From 2c907fe3df6ed58c4fb2798cbff840ecd1fc9bd4 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Mon, 19 Aug 2024 15:28:55 +0000 Subject: [PATCH 001/292] added communities creation --- backend/src/create_communities.py | 132 ++++++++++++++++++++++++++++++ backend/src/llm.py | 2 + backend/src/shared/common_fn.py | 4 +- 3 files changed, 136 insertions(+), 2 deletions(-) create mode 100644 backend/src/create_communities.py diff --git a/backend/src/create_communities.py b/backend/src/create_communities.py new file mode 100644 index 000000000..816dc9765 --- /dev/null +++ b/backend/src/create_communities.py @@ -0,0 +1,132 @@ +import os +from langchain_community.graphs import Neo4jGraph +from graphdatascience import GraphDataScience +import pandas as pd +import numpy as np +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.output_parsers import StrOutputParser +from src.llm import get_llm + +import os +import getpass +from neo4j import GraphDatabase, Result +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt +import tiktoken +import numpy as np +from langchain_openai import OpenAIEmbeddings, ChatOpenAI +from langchain_community.vectorstores import Neo4jVector +from langchain_community.graphs import Neo4jGraph +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.output_parsers import StrOutputParser + +from tqdm import tqdm +from src.shared.common_fn import load_embedding_model + + +from typing import Dict, Any +from concurrent.futures import ThreadPoolExecutor, as_completed +from tqdm import tqdm +from src.llm import get_llm + +import logging +from graphdatascience import GraphDataScience + +COMMUNITY_PROJECT_NAME = "communities" +NODE_PROJECTION = "__Entity__" +CREATE_COMMUNITY_CONSTRAINT = "CREATE CONSTRAINT IF NOT EXISTS FOR (c:__Community__) REQUIRE c.id IS UNIQUE;" + +def get_gds_driver(uri, username, password, database): + try: + gds = GraphDataScience( + endpoint=uri, + auth=(username, password), + database=database + ) + logging.info("Successfully created GDS driver.") + return gds + except Exception as e: + logging.error(f"Failed to create GDS driver: {e}") + raise + +def create_community_graph_project(gds, project_name=COMMUNITY_PROJECT_NAME, node_projection=NODE_PROJECTION): + try: + existing_projects = gds.graph.list() + project_exists = existing_projects["graphName"].str.contains(project_name, regex=False).any() + + if project_exists: + logging.info(f"Project '{project_name}' already exists. Dropping it.") + gds.graph.drop(project_name) + + logging.info(f"Creating new graph project '{project_name}'.") + graph_project, result = gds.graph.project( + project_name, + node_projection, + { + "_ALL_": { + "type": "*", + "orientation": "UNDIRECTED", + "properties": { + "weight": { + "property": "*", + "aggregation": "COUNT" + } + } + } + } + ) + logging.info(f"Graph project '{project_name}' created successfully.") + return graph_project, result + except Exception as e: + logging.error(f"Failed to create community graph project: {e}") + raise + +def write_communities(gds, graph_project, project_name=COMMUNITY_PROJECT_NAME): + try: + logging.info(f"Writing communities to the graph project '{project_name}'.") + gds.leiden.write( + graph_project, + writeProperty=project_name, + includeIntermediateCommunities=True, + relationshipWeightProperty="weight" + ) + logging.info("Communities written successfully.") + return True + except Exception as e: + logging.error(f"Failed to write communities: {e}") + return False + + + + +def create_communities(uri, username, password, database,graph): + try: + gds = get_gds_driver(uri, username, password, database) + graph_project, result = create_community_graph_project(gds) + + if write_communities(gds, graph_project): + logging.info("Applying community constraint to the graph.") + graph.query(CREATE_COMMUNITY_CONSTRAINT) + # Optionally, you can call create_community_properties() here if needed + logging.info("Communities creation process completed successfully.") + else: + logging.warning("Failed to write communities. Constraint was not applied.") + except Exception as e: + logging.error(f"Failed to create communities: {e}") + + + +# def create_communities(uri,username,password,database,graph): +# gds = get_gds_driver(uri,username,password,database) +# graph_project,result = create_community_graph_project(gds) +# result1 = write_communities(gds,graph_project) +# if result1: +# graph.query(CREATE_COMMUNITY_CONSTRAINT) +# # create_community_properties() + + + + + + diff --git a/backend/src/llm.py b/backend/src/llm.py index 0ee61b650..505bb89fb 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -153,9 +153,11 @@ def get_graph_document_list( node_properties = False else: node_properties = ["description"] + relationship_properties = ["description"] llm_transformer = LLMGraphTransformer( llm=llm, node_properties=node_properties, + relationship_properties=relationship_properties, allowed_nodes=allowedNodes, allowed_relationships=allowedRelationship, ) diff --git a/backend/src/shared/common_fn.py b/backend/src/shared/common_fn.py index b0dbe2f5f..6d24912c7 100644 --- a/backend/src/shared/common_fn.py +++ b/backend/src/shared/common_fn.py @@ -94,8 +94,8 @@ def load_embedding_model(embedding_model_name: str): return embeddings, dimension def save_graphDocuments_in_neo4j(graph:Neo4jGraph, graph_document_list:List[GraphDocument]): - # graph.add_graph_documents(graph_document_list, baseEntityLabel=True) - graph.add_graph_documents(graph_document_list) + graph.add_graph_documents(graph_document_list, baseEntityLabel=True,include_source=True) + # graph.add_graph_documents(graph_document_list) def handle_backticks_nodes_relationship_id_type(graph_document_list:List[GraphDocument]): for graph_document in graph_document_list: From 7081f13b5730b3ec3d60637a3df50d31176c3220 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:49:50 +0000 Subject: [PATCH 002/292] added communities --- backend/score.py | 8 + backend/src/communities.py | 253 +++ backend/src/create_communities.py | 132 -- experiments/graphrag.ipynb | 3519 +++++++++++++++++++++++++++++ 4 files changed, 3780 insertions(+), 132 deletions(-) create mode 100644 backend/src/communities.py delete mode 100644 backend/src/create_communities.py create mode 100644 experiments/graphrag.ipynb diff --git a/backend/score.py b/backend/score.py index 6f5113db6..7a2ab16c7 100644 --- a/backend/score.py +++ b/backend/score.py @@ -16,6 +16,7 @@ from src.chunkid_entities import get_entities_from_chunkids from src.post_processing import create_fulltext, create_entity_embedding from sse_starlette.sse import EventSourceResponse +from src.communities import create_communities import json from typing import List, Mapping from starlette.middleware.sessions import SessionMiddleware @@ -241,6 +242,13 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database graph = create_graph_database_connection(uri, userName, password, database) tasks = set(map(str.strip, json.loads(tasks))) + if "create_communities" in tasks: + model = "openai-gpt-4o" + await asyncio.to_thread(create_communities, uri, userName, password, database,graph,model) + josn_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(josn_obj) + logging.info(f'created communities') + if "materialize_text_chunk_similarities" in tasks: await asyncio.to_thread(update_graph, graph) josn_obj = {'api_name': 'post_processing/materialize_text_chunk_similarities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} diff --git a/backend/src/communities.py b/backend/src/communities.py new file mode 100644 index 000000000..e2f09cba6 --- /dev/null +++ b/backend/src/communities.py @@ -0,0 +1,253 @@ +import logging +from graphdatascience import GraphDataScience +from src.llm import get_llm +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.output_parsers import StrOutputParser +from tqdm import tqdm +from concurrent.futures import ThreadPoolExecutor, as_completed +from tqdm import tqdm + + +COMMUNITY_PROJECT_NAME = "communities" +NODE_PROJECTION = "__Entity__" +MAX_WORKERS = 10 + +CREATE_COMMUNITY_CONSTRAINT = "CREATE CONSTRAINT IF NOT EXISTS FOR (c:__Community__) REQUIRE c.id IS UNIQUE;" +CREATE_COMMUNITY_LEVELS = """ +MATCH (e:`__Entity__`) +UNWIND range(0, size(e.communities) - 1 , 1) AS index +CALL { + WITH e, index + WITH e, index + WHERE index = 0 + MERGE (c:`__Community__` {id: toString(index) + '-' + toString(e.communities[index])}) + ON CREATE SET c.level = index + MERGE (e)-[:IN_COMMUNITY]->(c) + RETURN count(*) AS count_0 +} +CALL { + WITH e, index + WITH e, index + WHERE index > 0 + MERGE (current:`__Community__` {id: toString(index) + '-' + toString(e.communities[index])}) + ON CREATE SET current.level = index + MERGE (previous:`__Community__` {id: toString(index - 1) + '-' + toString(e.communities[index - 1])}) + ON CREATE SET previous.level = index - 1 + MERGE (previous)-[:IN_COMMUNITY]->(current) + RETURN count(*) AS count_1 +} +RETURN count(*) +""" +CREATE_COMMUNITY_RANKS = """ +MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(:__Entity__)<-[:MENTIONS]-(d:Document) +WITH c, count(distinct d) AS rank +SET c.community_rank = rank; +""" + +CREATE_COMMUNITY_WEIGHTS = """ +MATCH (n:`__Community__`)<-[:IN_COMMUNITY]-()<-[:HAS_ENTITY]-(c) +WITH n, count(distinct c) AS chunkCount +SET n.weight = chunkCount""" + +GET_COMMUNITY_INFO = """ +MATCH (c:`__Community__`)<-[:IN_COMMUNITY*]-(e:__Entity__) +WHERE c.level IN [0,1,4] +WITH c, collect(e ) AS nodes +WHERE size(nodes) > 1 +CALL apoc.path.subgraphAll(nodes[0], { + whitelistNodes:nodes +}) +YIELD relationships +RETURN c.id AS communityId, + [n in nodes | {id: n.id, description: n.description, type: [el in labels(n) WHERE el <> '__Entity__'][0]}] AS nodes, + [r in relationships | {start: startNode(r).id, type: type(r), end: endNode(r).id, description: r.description}] AS rels +""" + +STORE_COMMUNITY_SUMMARIES = """ +UNWIND $data AS row +MERGE (c:__Community__ {id:row.community}) +SET c.summary = row.summary +""" + +COMMUNITY_TEMPLATE = """Based on the provided nodes and relationships that belong to the same graph community, +generate a natural language summary of the provided information: +{community_info} + +Summary:""" + + + +def get_gds_driver(uri, username, password, database): + try: + gds = GraphDataScience( + endpoint=uri, + auth=(username, password), + database=database + ) + logging.info("Successfully created GDS driver.") + return gds + except Exception as e: + logging.error(f"Failed to create GDS driver: {e}") + raise + +def create_community_graph_project(gds, project_name=COMMUNITY_PROJECT_NAME, node_projection=NODE_PROJECTION): + try: + existing_projects = gds.graph.list() + project_exists = existing_projects["graphName"].str.contains(project_name, regex=False).any() + + if project_exists: + logging.info(f"Project '{project_name}' already exists. Dropping it.") + gds.graph.drop(project_name) + + logging.info(f"Creating new graph project '{project_name}'.") + graph_project, result = gds.graph.project( + project_name, + node_projection, + { + "_ALL_": { + "type": "*", + "orientation": "UNDIRECTED", + "properties": { + "weight": { + "property": "*", + "aggregation": "COUNT" + } + } + } + } + ) + logging.info(f"Graph project '{project_name}' created successfully.") + return graph_project, result + except Exception as e: + logging.error(f"Failed to create community graph project: {e}") + raise + +def write_communities(gds, graph_project, project_name=COMMUNITY_PROJECT_NAME): + try: + logging.info(f"Writing communities to the graph project '{project_name}'.") + gds.leiden.write( + graph_project, + writeProperty=project_name, + includeIntermediateCommunities=True, + relationshipWeightProperty="weight" + ) + logging.info("Communities written successfully.") + return True + except Exception as e: + logging.error(f"Failed to write communities: {e}") + return False + + +def get_community_chain(model, community_template=COMMUNITY_TEMPLATE): + try: + llm, model_name = get_llm(model) + community_prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + "Given input triples, generate the information summary. No pre-amble.", + ), + ("human", community_template), + ] + ) + + community_chain = community_prompt | llm | StrOutputParser() + return community_chain + except Exception as e: + logging.error(f"Failed to create community chain: {e}") + raise + + +def prepare_string(community_data): + try: + nodes_description = "Nodes are:\n" + for node in community_data['nodes']: + node_id = node['id'] + node_type = node['type'] + node_description = f", description: {node['description']}" if 'description' in node and node['description'] else "" + nodes_description += f"id: {node_id}, type: {node_type}{node_description}\n" + + relationships_description = "Relationships are:\n" + for rel in community_data['rels']: + start_node = rel['start'] + end_node = rel['end'] + relationship_type = rel['type'] + relationship_description = f", description: {rel['description']}" if 'description' in rel and rel['description'] else "" + relationships_description += f"({start_node})-[:{relationship_type}]->({end_node}){relationship_description}\n" + + return nodes_description + "\n" + relationships_description + except Exception as e: + logging.error(f"Failed to prepare string from community data: {e}") + raise + +def process_community(community, community_chain): + try: + formatted_community_info = prepare_string(community) + summary = community_chain.invoke({'community_info': formatted_community_info}) + return {"community": community['communityId'], "summary": summary} + except Exception as e: + logging.error(f"Failed to process community {community.get('communityId', 'unknown')}: {e}") + raise + +def create_community_summaries(graph, model): + try: + community_info_list = graph.query(GET_COMMUNITY_INFO) + community_chain = get_community_chain(model) + + summaries = [] + with ThreadPoolExecutor() as executor: + futures = {executor.submit(process_community, community, community_chain): community for community in community_info_list} + + for future in tqdm(as_completed(futures), total=len(futures), desc="Processing communities"): + try: + summaries.append(future.result()) + except Exception as e: + logging.error(f"Failed to retrieve result for a community: {e}") + + graph.query(STORE_COMMUNITY_SUMMARIES, params={"data": summaries}) + except Exception as e: + logging.error(f"Failed to create community summaries: {e}") + raise + + +def create_community_properties(graph, model): + try: + # Create community levels + graph.query(CREATE_COMMUNITY_LEVELS) + logging.info("Successfully created community levels.") + + # Create community ranks + graph.query(CREATE_COMMUNITY_RANKS) + logging.info("Successfully created community ranks.") + + # Create community weights + graph.query(CREATE_COMMUNITY_WEIGHTS) + logging.info("Successfully created community weights.") + + # Create community summaries + create_community_summaries(graph, model) + logging.info("Successfully created community summaries.") + except Exception as e: + logging.error(f"Failed to create community properties: {e}") + raise + +def create_communities(uri, username, password, database,graph,model): + try: + gds = get_gds_driver(uri, username, password, database) + graph_project, result = create_community_graph_project(gds) + write_communities_sucess = write_communities(gds, graph_project) + if write_communities_sucess: + logging.info("Applying community constraint to the graph.") + graph.query(CREATE_COMMUNITY_CONSTRAINT) + create_community_properties(graph,model) + logging.info("Communities creation process completed successfully.") + else: + logging.warning("Failed to write communities. Constraint was not applied.") + except Exception as e: + logging.error(f"Failed to create communities: {e}") + + + + + + diff --git a/backend/src/create_communities.py b/backend/src/create_communities.py deleted file mode 100644 index 816dc9765..000000000 --- a/backend/src/create_communities.py +++ /dev/null @@ -1,132 +0,0 @@ -import os -from langchain_community.graphs import Neo4jGraph -from graphdatascience import GraphDataScience -import pandas as pd -import numpy as np -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.output_parsers import StrOutputParser -from src.llm import get_llm - -import os -import getpass -from neo4j import GraphDatabase, Result -import pandas as pd -import seaborn as sns -import matplotlib.pyplot as plt -import tiktoken -import numpy as np -from langchain_openai import OpenAIEmbeddings, ChatOpenAI -from langchain_community.vectorstores import Neo4jVector -from langchain_community.graphs import Neo4jGraph -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.output_parsers import StrOutputParser - -from tqdm import tqdm -from src.shared.common_fn import load_embedding_model - - -from typing import Dict, Any -from concurrent.futures import ThreadPoolExecutor, as_completed -from tqdm import tqdm -from src.llm import get_llm - -import logging -from graphdatascience import GraphDataScience - -COMMUNITY_PROJECT_NAME = "communities" -NODE_PROJECTION = "__Entity__" -CREATE_COMMUNITY_CONSTRAINT = "CREATE CONSTRAINT IF NOT EXISTS FOR (c:__Community__) REQUIRE c.id IS UNIQUE;" - -def get_gds_driver(uri, username, password, database): - try: - gds = GraphDataScience( - endpoint=uri, - auth=(username, password), - database=database - ) - logging.info("Successfully created GDS driver.") - return gds - except Exception as e: - logging.error(f"Failed to create GDS driver: {e}") - raise - -def create_community_graph_project(gds, project_name=COMMUNITY_PROJECT_NAME, node_projection=NODE_PROJECTION): - try: - existing_projects = gds.graph.list() - project_exists = existing_projects["graphName"].str.contains(project_name, regex=False).any() - - if project_exists: - logging.info(f"Project '{project_name}' already exists. Dropping it.") - gds.graph.drop(project_name) - - logging.info(f"Creating new graph project '{project_name}'.") - graph_project, result = gds.graph.project( - project_name, - node_projection, - { - "_ALL_": { - "type": "*", - "orientation": "UNDIRECTED", - "properties": { - "weight": { - "property": "*", - "aggregation": "COUNT" - } - } - } - } - ) - logging.info(f"Graph project '{project_name}' created successfully.") - return graph_project, result - except Exception as e: - logging.error(f"Failed to create community graph project: {e}") - raise - -def write_communities(gds, graph_project, project_name=COMMUNITY_PROJECT_NAME): - try: - logging.info(f"Writing communities to the graph project '{project_name}'.") - gds.leiden.write( - graph_project, - writeProperty=project_name, - includeIntermediateCommunities=True, - relationshipWeightProperty="weight" - ) - logging.info("Communities written successfully.") - return True - except Exception as e: - logging.error(f"Failed to write communities: {e}") - return False - - - - -def create_communities(uri, username, password, database,graph): - try: - gds = get_gds_driver(uri, username, password, database) - graph_project, result = create_community_graph_project(gds) - - if write_communities(gds, graph_project): - logging.info("Applying community constraint to the graph.") - graph.query(CREATE_COMMUNITY_CONSTRAINT) - # Optionally, you can call create_community_properties() here if needed - logging.info("Communities creation process completed successfully.") - else: - logging.warning("Failed to write communities. Constraint was not applied.") - except Exception as e: - logging.error(f"Failed to create communities: {e}") - - - -# def create_communities(uri,username,password,database,graph): -# gds = get_gds_driver(uri,username,password,database) -# graph_project,result = create_community_graph_project(gds) -# result1 = write_communities(gds,graph_project) -# if result1: -# graph.query(CREATE_COMMUNITY_CONSTRAINT) -# # create_community_properties() - - - - - - diff --git a/experiments/graphrag.ipynb b/experiments/graphrag.ipynb new file mode 100644 index 000000000..a3f99796a --- /dev/null +++ b/experiments/graphrag.ipynb @@ -0,0 +1,3519 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install --quiet --upgrade langchain-community langchain-experimental langchain-openai neo4j graphdatascience tiktoken retry" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/workspaces/llm-graph-builder/chatbotenv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import getpass\n", + "import os\n", + "from langchain_community.graphs import Neo4jGraph\n", + "from graphdatascience import GraphDataScience\n", + "import pandas as pd\n", + "import numpy as np\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from src.llm import get_llm\n", + "\n", + "import os\n", + "import getpass\n", + "from neo4j import GraphDatabase, Result\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "import tiktoken\n", + "import numpy as np\n", + "from langchain_openai import OpenAIEmbeddings, ChatOpenAI\n", + "from langchain_community.vectorstores import Neo4jVector\n", + "from langchain_community.graphs import Neo4jGraph\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "# from llama_index.core.schema import TextNode\n", + "# from llama_index.core.vector_stores.utils import node_to_metadata_dict\n", + "# from llama_index.vector_stores.neo4jvector import Neo4jVectorStore\n", + "# from llama_index.core import VectorStoreIndex\n", + "from tqdm import tqdm\n", + "from src.shared.common_fn import load_embedding_model\n", + "\n", + "\n", + "from typing import Dict, Any\n", + "from concurrent.futures import ThreadPoolExecutor, as_completed\n", + "from tqdm import tqdm\n", + "from src.llm import get_llm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"NEO4J_URI\"] = \"\"\n", + "os.environ[\"NEO4J_USERNAME\"] = \"neo4j\"\n", + "os.environ[\"NEO4J_PASSWORD\"] = \"\"\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('advertisedListenAddress' returned by 'gds.debug.arrow' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: 'CALL gds.debug.arrow()'\n", + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('serverLocation' returned by 'gds.debug.arrow' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: 'CALL gds.debug.arrow()'\n" + ] + } + ], + "source": [ + "gds = GraphDataScience(\n", + " os.environ[\"NEO4J_URI\"],\n", + " auth=(os.environ[\"NEO4J_USERNAME\"], os.environ[\"NEO4J_PASSWORD\"])\n", + ")\n", + "\n", + "graph = Neo4jGraph() \n", + "\n", + "driver = GraphDatabase.driver(os.environ[\"NEO4J_URI\"], auth=(os.environ[\"NEO4J_USERNAME\"], os.environ[\"NEO4J_PASSWORD\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'type': 'node', 'total_count': 185, 'non_null_descriptions': 146},\n", + " {'type': 'relationship', 'total_count': 14614, 'non_null_descriptions': 41}]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graph.query(\"\"\"\n", + "MATCH (n:`__Entity__`)\n", + "RETURN \"node\" AS type,\n", + " count(*) AS total_count,\n", + " count(n.description) AS non_null_descriptions\n", + "UNION ALL\n", + "MATCH (n)-[r:!MENTIONS]->()\n", + "RETURN \"relationship\" AS type,\n", + " count(*) AS total_count,\n", + " count(r.description) AS non_null_descriptions\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('schema' returned by 'gds.graph.list' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: 'CALL gds.graph.list()'\n" + ] + } + ], + "source": [ + "available_graphs = gds.graph.list()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "available_graphs.drop([0,1],inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
degreeDistributiongraphNamedatabasedatabaseLocationmemoryUsagesizeInBytesnodeCountrelationshipCountconfigurationdensitycreationTimemodificationTimeschemaschemaWithOrientation
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [degreeDistribution, graphName, database, databaseLocation, memoryUsage, sizeInBytes, nodeCount, relationshipCount, configuration, density, creationTime, modificationTime, schema, schemaWithOrientation]\n", + "Index: []" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "available_graphs" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "available_graphs[\"graphName\"].str.contains('communities', regex=False).any()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('schema' returned by 'gds.graph.drop' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: 'CALL gds.graph.drop($graph_name, $fail_if_missing, $db_name)'\n" + ] + }, + { + "data": { + "text/plain": [ + "graphName communities\n", + "database neo4j\n", + "databaseLocation local\n", + "memoryUsage \n", + "sizeInBytes -1\n", + "nodeCount 185\n", + "relationshipCount 366\n", + "configuration {'relationshipProjection': {'_ALL_': {'aggrega...\n", + "density 0.010752\n", + "creationTime 2024-08-19T12:41:43.997435629+00:00\n", + "modificationTime 2024-08-19T12:41:44.127124307+00:00\n", + "schema {'graphProperties': {}, 'nodes': {'__Entity__'...\n", + "schemaWithOrientation {'graphProperties': {}, 'nodes': {'__Entity__'...\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gds.graph.drop(\"communities\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "G, result = gds.graph.project(\n", + " \"communities\", # Graph name\n", + " \"__Entity__\", # Node projection\n", + " {\n", + " \"_ALL_\": {\n", + " \"type\": \"*\",\n", + " \"orientation\": \"UNDIRECTED\",\n", + " \"properties\": {\"weight\": {\"property\": \"*\", \"aggregation\": \"COUNT\"}},\n", + " }\n", + " },\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "nodeProjection {'__Entity__': {'label': '__Entity__', 'proper...\n", + "relationshipProjection {'_ALL_': {'aggregation': 'DEFAULT', 'orientat...\n", + "graphName communities\n", + "nodeCount 185\n", + "relationshipCount 366\n", + "projectMillis 15\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('schema' returned by 'gds.graph.list' is deprecated.)} {position: line: 1, column: 106, offset: 105} for query: 'CALL gds.graph.list($graph_name) YIELD graphName, nodeCount, relationshipCount, database, configuration, schema, memoryUsage'\n" + ] + }, + { + "data": { + "text/plain": [ + "Graph({'graphName': 'communities', 'nodeCount': 185, 'relationshipCount': 366, 'database': 'neo4j', 'configuration': {'relationshipProjection': {'_ALL_': {'aggregation': 'DEFAULT', 'orientation': 'UNDIRECTED', 'indexInverse': False, 'properties': {'weight': {'aggregation': 'COUNT', 'property': '*', 'defaultValue': None}}, 'type': '*'}}, 'readConcurrency': 4, 'relationshipProperties': {}, 'nodeProperties': {}, 'jobId': 'ced0b057-a717-48d8-8b90-842f6f2aa001', 'nodeProjection': {'__Entity__': {'label': '__Entity__', 'properties': {}}}, 'logProgress': True, 'creationTime': neo4j.time.DateTime(2024, 8, 19, 13, 10, 3, 641071700, tzinfo=), 'validateRelationships': False, 'sudo': False}, 'schema': {'graphProperties': {}, 'nodes': {'__Entity__': {}}, 'relationships': {'_ALL_': {'weight': 'Float (DefaultValue(NaN), PERSISTENT, Aggregation.COUNT)'}}}, 'memoryUsage': '2316 KiB'})" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "G" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'COMMUNITY_PROJECT_NAME'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"community_project_name\".upper()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Component count: 14\n", + "Component distribution: {'min': 1, 'p5': 1, 'max': 155, 'p999': 155, 'p99': 155, 'p1': 1, 'p10': 1, 'p90': 3, 'p50': 1, 'p25': 1, 'p75': 2, 'p95': 155, 'mean': 12.285714285714286}\n" + ] + } + ], + "source": [ + "wcc = gds.wcc.stats(G)\n", + "print(f\"Component count: {wcc['componentCount']}\")\n", + "print(f\"Component distribution: {wcc['componentDistribution']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "writeMillis 5\n", + "nodePropertiesWritten 172\n", + "ranLevels 2\n", + "didConverge True\n", + "nodeCount 172\n", + "communityCount 22\n", + "communityDistribution {'min': 1, 'p5': 1, 'max': 64, 'p999': 64, 'p9...\n", + "modularity 0.678705\n", + "modularities [0.6566821889989846, 0.6787052274080039]\n", + "postProcessingMillis 3\n", + "preProcessingMillis 0\n", + "computeMillis 33\n", + "configuration {'writeProperty': 'communities', 'theta': 0.01...\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gds.leiden.write(\n", + " G,\n", + " writeProperty=\"communities\",\n", + " includeIntermediateCommunities=True,\n", + " relationshipWeightProperty=\"weight\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graph.query(\"CREATE CONSTRAINT IF NOT EXISTS FOR (c:__Community__) REQUIRE c.id IS UNIQUE;\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'count(*)': 344}]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graph.query(\"\"\"\n", + "MATCH (e:`__Entity__`)\n", + "UNWIND range(0, size(e.communities) - 1 , 1) AS index\n", + "CALL {\n", + " WITH e, index\n", + " WITH e, index\n", + " WHERE index = 0\n", + " MERGE (c:`__Community__` {id: toString(index) + '-' + toString(e.communities[index])})\n", + " ON CREATE SET c.level = index\n", + " MERGE (e)-[:IN_COMMUNITY]->(c)\n", + " RETURN count(*) AS count_0\n", + "}\n", + "CALL {\n", + " WITH e, index\n", + " WITH e, index\n", + " WHERE index > 0\n", + " MERGE (current:`__Community__` {id: toString(index) + '-' + toString(e.communities[index])})\n", + " ON CREATE SET current.level = index\n", + " MERGE (previous:`__Community__` {id: toString(index - 1) + '-' + toString(e.communities[index - 1])})\n", + " ON CREATE SET previous.level = index - 1\n", + " MERGE (previous)-[:IN_COMMUNITY]->(current)\n", + " RETURN count(*) AS count_1\n", + "}\n", + "RETURN count(*)\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graph.query(\"\"\"\n", + "MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(:__Entity__)<-[:MENTIONS]-(d:Document)\n", + "WITH c, count(distinct d) AS rank\n", + "SET c.community_rank = rank;\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
LevelNumber of communities25th Percentile50th Percentile75th Percentile90th Percentile99th PercentileMax
00281.02.03.7513.949.6661
11221.02.08.5017.854.5564
\n", + "
" + ], + "text/plain": [ + " Level Number of communities 25th Percentile 50th Percentile \\\n", + "0 0 28 1.0 2.0 \n", + "1 1 22 1.0 2.0 \n", + "\n", + " 75th Percentile 90th Percentile 99th Percentile Max \n", + "0 3.75 13.9 49.66 61 \n", + "1 8.50 17.8 54.55 64 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "community_size = graph.query(\n", + " \"\"\"\n", + "MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(e:__Entity__)\n", + "WITH c, count(distinct e) AS entities\n", + "RETURN split(c.id, '-')[0] AS level, entities\n", + "\"\"\"\n", + ")\n", + "community_size_df = pd.DataFrame.from_records(community_size)\n", + "percentiles_data = []\n", + "for level in community_size_df[\"level\"].unique():\n", + " subset = community_size_df[community_size_df[\"level\"] == level][\"entities\"]\n", + " num_communities = len(subset)\n", + " percentiles = np.percentile(subset, [25, 50, 75, 90, 99])\n", + " percentiles_data.append(\n", + " [\n", + " level,\n", + " num_communities,\n", + " percentiles[0],\n", + " percentiles[1],\n", + " percentiles[2],\n", + " percentiles[3],\n", + " percentiles[4],\n", + " max(subset)\n", + " ]\n", + " )\n", + "\n", + "# Create a DataFrame with the percentiles\n", + "percentiles_df = pd.DataFrame(\n", + " percentiles_data,\n", + " columns=[\n", + " \"Level\",\n", + " \"Number of communities\",\n", + " \"25th Percentile\",\n", + " \"50th Percentile\",\n", + " \"75th Percentile\",\n", + " \"90th Percentile\",\n", + " \"99th Percentile\",\n", + " \"Max\"\n", + " ],\n", + ")\n", + "percentiles_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### community summaries" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "community_info = graph.query(\"\"\"\n", + "MATCH (c:`__Community__`)<-[:IN_COMMUNITY*]-(e:__Entity__)\n", + "WHERE c.level IN [0,1,4]\n", + "WITH c, collect(e ) AS nodes\n", + "WHERE size(nodes) > 1\n", + "CALL apoc.path.subgraphAll(nodes[0], {\n", + "\twhitelistNodes:nodes\n", + "})\n", + "YIELD relationships\n", + "RETURN c.id AS communityId,\n", + " [n in nodes | {id: n.id, description: n.description, type: [el in labels(n) WHERE el <> '__Entity__'][0]}] AS nodes,\n", + " [r in relationships | {start: startNode(r).id, type: type(r), end: endNode(r).id, description: r.description}] AS rels\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'communityId': '0-36',\n", + " 'nodes': [{'id': 'India',\n", + " 'description': 'Officially the Republic of India, located in South Asia.',\n", + " 'type': 'Country'},\n", + " {'id': 'Supreme Court Of India',\n", + " 'description': 'Head of the independent judiciary.',\n", + " 'type': 'Judiciary'},\n", + " {'id': 'Indian National Congress',\n", + " 'description': 'Dominated Indian politics until 1977.',\n", + " 'type': 'Political party'},\n", + " {'id': 'Sindhu',\n", + " 'description': 'Name of the Indus River, from which the name India is derived.',\n", + " 'type': 'River'},\n", + " {'id': 'Food And Agriculture Organization Of The United Nations',\n", + " 'description': \"Estimates India's forest cover.\",\n", + " 'type': 'Organization'},\n", + " {'id': '2013 Forest Survey Of India',\n", + " 'description': \"States India's forest cover increased to 69.8 million hectares by 2012.\",\n", + " 'type': 'Report'},\n", + " {'id': 'Yajnavalkya Smriti',\n", + " 'description': 'Prohibited the cutting of trees.',\n", + " 'type': 'Literature'},\n", + " {'id': 'Kautalyas Arthashastra',\n", + " 'description': 'Discusses the need for forest administration.',\n", + " 'type': 'Literature'},\n", + " {'id': 'Crown Land (Encroachment) Ordinance',\n", + " 'description': \"Promulgated in 1840 targeting forests in Britain's Asian colonies.\",\n", + " 'type': 'Legislation'},\n", + " {'id': 'Indian Forest Act Of 1865',\n", + " 'description': \"Established the government's claims over forests.\",\n", + " 'type': 'Legislation'},\n", + " {'id': 'Forest Act Of 1878',\n", + " 'description': 'Gave the British government control over all wastelands, including forests.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'Sir Dietrich Brandis',\n", + " 'description': 'Inspector General of Forests in India from 1864 to 1883.',\n", + " 'type': 'Person'},\n", + " {'id': 'Indian Ocean',\n", + " 'description': 'Ocean bounded by India on the north.',\n", + " 'type': 'Ocean'},\n", + " {'id': 'Sri Lanka',\n", + " 'description': 'Country in the vicinity of India in the Indian Ocean.',\n", + " 'type': 'Country'},\n", + " {'id': 'Maldives',\n", + " 'description': 'Country in the vicinity of India in the Indian Ocean.',\n", + " 'type': 'Country'},\n", + " {'id': 'Myanmar',\n", + " 'description': 'Country sharing land border with India to the east.',\n", + " 'type': 'Country'},\n", + " {'id': 'Himalayan Mountain Range',\n", + " 'description': 'Defines the northern frontiers of India.',\n", + " 'type': 'Mountain range'},\n", + " {'id': 'China',\n", + " 'description': 'Country sharing land border with India to the north.',\n", + " 'type': 'Country'},\n", + " {'id': 'Bhutan',\n", + " 'description': 'Country sharing land border with India to the north.',\n", + " 'type': 'Country'},\n", + " {'id': 'Nepal',\n", + " 'description': 'Country sharing land border with India to the north.',\n", + " 'type': 'Country'},\n", + " {'id': 'Pakistan',\n", + " 'description': 'Country sharing land border with India to the west.',\n", + " 'type': 'Country'},\n", + " {'id': 'Bangladesh',\n", + " 'description': 'Country sharing land border with India to the east.',\n", + " 'type': 'Country'},\n", + " {'id': 'Ganges',\n", + " 'description': 'The longest river originating in India.',\n", + " 'type': 'River'},\n", + " {'id': 'Western Ghats',\n", + " 'description': 'One of the three biodiversity hotspots in India.',\n", + " 'type': 'Biodiversity hotspot'},\n", + " {'id': 'Eastern Himalayas',\n", + " 'description': 'One of the three biodiversity hotspots in India.',\n", + " 'type': 'Biodiversity hotspot'},\n", + " {'id': 'Indo-Burma Hotspot',\n", + " 'description': 'One of the three biodiversity hotspots in India.',\n", + " 'type': 'Biodiversity hotspot'},\n", + " {'id': 'Forests',\n", + " 'description': 'Covers about 24.6% of the total land area.',\n", + " 'type': 'Ecosystem'},\n", + " {'id': 'Indomalayan Realm',\n", + " 'description': 'Geographical realm that includes India.',\n", + " 'type': 'Realm'},\n", + " {'id': 'World', 'description': None, 'type': None},\n", + " {'id': 'Public_Sector', 'description': None, 'type': None},\n", + " {'id': 'Gdp', 'description': None, 'type': None},\n", + " {'id': 'Economic_Liberalisation', 'description': None, 'type': None},\n", + " {'id': 'Gdp_Growth', 'description': None, 'type': None},\n", + " {'id': 'Consumer_Market', 'description': None, 'type': None},\n", + " {'id': 'Wto', 'description': None, 'type': None},\n", + " {'id': 'Ease_Of_Business_Index', 'description': None, 'type': None},\n", + " {'id': 'Global_Competitiveness_Index', 'description': None, 'type': None},\n", + " {'id': 'Billionaires', 'description': None, 'type': None},\n", + " {'id': 'Welfare_State', 'description': None, 'type': None},\n", + " {'id': 'Socialist_State', 'description': None, 'type': None},\n", + " {'id': 'Social_Welfare_Spending', 'description': None, 'type': None},\n", + " {'id': 'Labour_Force', 'description': None, 'type': None},\n", + " {'id': 'Fdi', 'description': None, 'type': None},\n", + " {'id': 'Free_Trade_Agreements', 'description': None, 'type': None},\n", + " {'id': 'Service_Sector', 'description': None, 'type': None},\n", + " {'id': 'Stock_Exchanges', 'description': None, 'type': None},\n", + " {'id': 'Manufacturing', 'description': None, 'type': None},\n", + " {'id': 'Population', 'description': None, 'type': None},\n", + " {'id': 'Unemployment', 'description': None, 'type': None},\n", + " {'id': 'Savings_Rate', 'description': None, 'type': None},\n", + " {'id': 'Arabian Sea',\n", + " 'description': 'Sea bounded by India on the northwest.',\n", + " 'type': 'Sea'},\n", + " {'id': 'Bay Of Bengal',\n", + " 'description': 'Bay bounded by India on the southeast.',\n", + " 'type': 'Bay'},\n", + " {'id': 'Africa',\n", + " 'description': 'Continent from which modern humans arrived on the Indian subcontinent.',\n", + " 'type': 'Continent'},\n", + " {'id': 'Indus Valley Civilisation',\n", + " 'description': 'Civilisation that evolved from settled life in the western margins of the Indus river basin.',\n", + " 'type': 'Civilization'},\n", + " {'id': 'Sanskrit',\n", + " 'description': 'Indo-European language that diffused into India from the northwest.',\n", + " 'type': 'Language'},\n", + " {'id': 'Rigveda',\n", + " 'description': 'Ancient hymns recording the dawning of Hinduism in India.',\n", + " 'type': 'Text'},\n", + " {'id': 'Hinduism',\n", + " 'description': 'Major religion in India, with roots in the Rigveda.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Buddhism',\n", + " 'description': 'Religion that arose in India, proclaiming social orders unlinked to heredity.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Jainism',\n", + " 'description': 'Religion that arose in India, proclaiming social orders unlinked to heredity.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Mughal Empire',\n", + " 'description': 'Empire that began in 1526, known for its architecture and relative peace.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Indian Republic',\n", + " 'description': 'India has been a federal republic since 1950.',\n", + " 'type': 'Government'}],\n", + " 'rels': [{'start': 'India',\n", + " 'description': None,\n", + " 'type': 'IS_PART_OF',\n", + " 'end': 'Indomalayan Realm'},\n", + " {'start': 'India',\n", + " 'description': 'About 1,900 public sector companies with complete control and ownership of railways, highways, and majority control in various industries.',\n", + " 'type': 'HAS',\n", + " 'end': 'Public_Sector'},\n", + " {'start': 'India',\n", + " 'description': \"One of the world's highest number of billionaires.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Billionaires'},\n", + " {'start': 'India',\n", + " 'description': \"World's second-largest labour force with 586 million workers.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Labour_Force'},\n", + " {'start': 'India',\n", + " 'description': 'Has free trade agreements with several nations and blocs.',\n", + " 'type': 'HAS',\n", + " 'end': 'Free_Trade_Agreements'},\n", + " {'start': 'India',\n", + " 'description': \"Bombay Stock Exchange and National Stock Exchange are among the world's largest stock exchanges.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Stock_Exchanges'},\n", + " {'start': 'India',\n", + " 'description': \"Nearly 65% of India's population is rural.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Population'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'RELATES_TO',\n", + " 'end': 'Rigveda'},\n", + " {'start': 'India',\n", + " 'description': 'Member of the World Trade Organization since 1 January 1995.',\n", + " 'type': 'MEMBER_OF',\n", + " 'end': 'Wto'},\n", + " {'start': 'India',\n", + " 'description': 'High unemployment and rising income inequality.',\n", + " 'type': 'FACES',\n", + " 'end': 'Unemployment'},\n", + " {'start': 'India',\n", + " 'description': 'Adopted broad economic liberalisation in 1991.',\n", + " 'type': 'ADOPTED',\n", + " 'end': 'Economic_Liberalisation'},\n", + " {'start': 'India',\n", + " 'description': 'Often considered a welfare state.',\n", + " 'type': 'CONSIDERED_AS',\n", + " 'end': 'Welfare_State'},\n", + " {'start': 'India', 'description': None, 'type': 'NEAR', 'end': 'Sri Lanka'},\n", + " {'start': 'India', 'description': None, 'type': 'NEAR', 'end': 'Maldives'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Indian Ocean'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Myanmar'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Himalayan Mountain Range'},\n", + " {'start': 'India', 'description': None, 'type': 'BORDERS', 'end': 'China'},\n", + " {'start': 'India', 'description': None, 'type': 'BORDERS', 'end': 'Bhutan'},\n", + " {'start': 'India', 'description': None, 'type': 'BORDERS', 'end': 'Nepal'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Pakistan'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Bangladesh'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Arabian Sea'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Bay Of Bengal'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'SEPARATED_BY',\n", + " 'end': 'Sri Lanka'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'DERIVED_FROM',\n", + " 'end': 'Sindhu'},\n", + " {'start': 'India',\n", + " 'description': \"Estimates India's forest cover.\",\n", + " 'type': 'ESTIMATES',\n", + " 'end': 'Food And Agriculture Organization Of The United Nations'},\n", + " {'start': 'India',\n", + " 'description': 'Makes up more than 50% of GDP.',\n", + " 'type': 'MAKES_UP',\n", + " 'end': 'Service_Sector'},\n", + " {'start': 'India',\n", + " 'description': 'Fifth-largest economy by nominal GDP and third-largest by purchasing power parity (PPP).',\n", + " 'type': 'RANKS_AS',\n", + " 'end': 'World'},\n", + " {'start': 'India',\n", + " 'description': 'Fourth-largest consumer market in the world.',\n", + " 'type': 'RANKS_AS',\n", + " 'end': 'Consumer_Market'},\n", + " {'start': 'India',\n", + " 'description': \"World's sixth-largest manufacturer, representing 2.6% of global manufacturing output.\",\n", + " 'type': 'RANKS_AS',\n", + " 'end': 'Manufacturing'},\n", + " {'start': 'India',\n", + " 'description': 'Officially declared a socialist state as per the constitution.',\n", + " 'type': 'DECLARED_AS',\n", + " 'end': 'Socialist_State'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Hinduism'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Buddhism'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Jainism'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'GOVERNED_AS',\n", + " 'end': 'Indian Republic'},\n", + " {'start': 'India',\n", + " 'description': '136th by GDP (nominal) and 125th by GDP (PPP) on a per capita income basis.',\n", + " 'type': 'RANKS',\n", + " 'end': 'Gdp'},\n", + " {'start': 'India',\n", + " 'description': '63rd on the Ease of Doing Business index.',\n", + " 'type': 'RANKS',\n", + " 'end': 'Ease_Of_Business_Index'},\n", + " {'start': 'India',\n", + " 'description': '40th on the Global Competitiveness Index.',\n", + " 'type': 'RANKS',\n", + " 'end': 'Global_Competitiveness_Index'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'SHARES_BORDERS_WITH',\n", + " 'end': 'Myanmar'},\n", + " {'start': 'India',\n", + " 'description': 'Overall social welfare spending stood at 8.6% of GDP in 2021-22.',\n", + " 'type': 'SPENDING',\n", + " 'end': 'Social_Welfare_Spending'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_EVENT',\n", + " 'end': 'Mughal Empire'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGINATES_IN',\n", + " 'end': 'Ganges'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_BIODIVERSITY_HOTSPOT',\n", + " 'end': 'Western Ghats'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_BIODIVERSITY_HOTSPOT',\n", + " 'end': 'Eastern Himalayas'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_BIODIVERSITY_HOTSPOT',\n", + " 'end': 'Indo-Burma Hotspot'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_ECOSYSTEM',\n", + " 'end': 'Forests'},\n", + " {'start': 'India',\n", + " 'description': '6% to 7% since the start of the 21st century.',\n", + " 'type': 'AVERAGE_ANNUAL_GROWTH',\n", + " 'end': 'Gdp_Growth'},\n", + " {'start': 'India',\n", + " 'description': 'Foreign direct investment in 2021-22 was $82 billion.',\n", + " 'type': 'FOREIGN_DIRECT_INVESTMENT',\n", + " 'end': 'Fdi'},\n", + " {'start': 'India',\n", + " 'description': 'Gross domestic savings rate stood at 29.3% of GDP in 2022.',\n", + " 'type': 'GROSS_DOMESTIC_SAVINGS_RATE',\n", + " 'end': 'Savings_Rate'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HUMAN_ORIGIN',\n", + " 'end': 'Africa'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_DEVELOPMENT',\n", + " 'end': 'Indus Valley Civilisation'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'LANGUAGE_DIFFUSION',\n", + " 'end': 'Sanskrit'},\n", + " {'start': 'Kautalyas Arthashastra',\n", + " 'description': 'Discusses the need for forest administration.',\n", + " 'type': 'DISCUSSES',\n", + " 'end': 'India'},\n", + " {'start': 'Indian Forest Act Of 1865',\n", + " 'description': \"Establishes government's claims over forests.\",\n", + " 'type': 'ESTABLISHES',\n", + " 'end': 'India'},\n", + " {'start': 'Crown Land (Encroachment) Ordinance',\n", + " 'description': \"Targets forests in Britain's Asian colonies.\",\n", + " 'type': 'TARGETS',\n", + " 'end': 'India'},\n", + " {'start': 'Yajnavalkya Smriti',\n", + " 'description': 'Prohibits the cutting of trees.',\n", + " 'type': 'PROHIBITS',\n", + " 'end': 'India'},\n", + " {'start': '2013 Forest Survey Of India',\n", + " 'description': 'Reports on forest cover increase.',\n", + " 'type': 'REPORTS_ON',\n", + " 'end': 'India'},\n", + " {'start': 'Supreme Court Of India',\n", + " 'description': None,\n", + " 'type': 'PART_OF_JUDICIARY',\n", + " 'end': 'India'},\n", + " {'start': 'Indian National Congress',\n", + " 'description': None,\n", + " 'type': 'HISTORICALLY_DOMINANT_PARTY',\n", + " 'end': 'India'},\n", + " {'start': 'Forest Act Of 1878',\n", + " 'description': 'Gives control over all wastelands, including forests.',\n", + " 'type': 'GIVES_CONTROL',\n", + " 'end': 'India'},\n", + " {'start': 'Sir Dietrich Brandis',\n", + " 'description': 'Inspector General of Forests in India from 1864 to 1883.',\n", + " 'type': 'INSPECTOR_GENERAL_OF',\n", + " 'end': 'India'}]},\n", + " {'communityId': '0-34',\n", + " 'nodes': [{'id': 'Constitution Of India',\n", + " 'description': 'The supreme law of India, laying down the framework for government institutions and fundamental rights.',\n", + " 'type': 'Document'},\n", + " {'id': 'Parliament',\n", + " 'description': 'Cannot override the Constitution of India.',\n", + " 'type': 'Institution'},\n", + " {'id': 'Government Of India Act 1935',\n", + " 'description': 'Replaced by the Constitution of India.',\n", + " 'type': 'Document'},\n", + " {'id': 'Constituent Assembly',\n", + " 'description': 'Tasked with drafting the Constitution of India.',\n", + " 'type': 'Government body'},\n", + " {'id': 'M. N. Roy',\n", + " 'description': 'Proposed the framework for the Constitution of India.',\n", + " 'type': 'Person'},\n", + " {'id': 'Republic Day',\n", + " 'description': 'Celebrated on 26 January in India to honor the Constitution.',\n", + " 'type': 'Event'},\n", + " {'id': 'Old Parliament House',\n", + " 'description': 'Preserves the original 1950 Constitution in a nitrogen-filled case.',\n", + " 'type': 'Location'},\n", + " {'id': 'British Government',\n", + " 'description': 'Responsible for the external security of India during its dominion status.',\n", + " 'type': 'Institution'},\n", + " {'id': 'Indian Independence Act 1947',\n", + " 'description': 'Repealed by the Constitution of India.',\n", + " 'type': 'Document'},\n", + " {'id': 'Articles Of The Constitution',\n", + " 'description': 'Articles 5, 6, 7, 8, 9, 60, 324, 366, 367, 379, 380, 388, 391, 392, 393, and 394 came into force on 26 November 1949.',\n", + " 'type': 'Document'}],\n", + " 'rels': [{'start': 'Constitution Of India',\n", + " 'description': 'The Constitution is based on the proposal suggested by M. N. Roy.',\n", + " 'type': 'BASED_ON',\n", + " 'end': 'M. N. Roy'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'The original 1950 Constitution is preserved in a nitrogen-filled case.',\n", + " 'type': 'PRESERVED_IN',\n", + " 'end': 'Old Parliament House'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Celebrated on 26 January.',\n", + " 'type': 'CELEBRATED_ON',\n", + " 'end': 'Republic Day'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Parliament cannot override the Constitution.',\n", + " 'type': 'CANNOT_OVERRIDE',\n", + " 'end': 'Parliament'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Certain articles came into force on 26 November 1949.',\n", + " 'type': 'CAME_INTO_FORCE',\n", + " 'end': 'Articles Of The Constitution'},\n", + " {'start': 'Government Of India Act 1935',\n", + " 'description': 'Replaced by the Constitution of India.',\n", + " 'type': 'REPLACED_BY',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'British Government',\n", + " 'description': 'Responsible for the external security of India during its dominion status.',\n", + " 'type': 'RESPONSIBLE_FOR',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'Indian Independence Act 1947',\n", + " 'description': 'Repealed by the Constitution of India.',\n", + " 'type': 'REPEALED_BY',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'Constituent Assembly',\n", + " 'description': None,\n", + " 'type': 'DRAFTED_BY',\n", + " 'end': 'Constitution Of India'}]},\n", + " {'communityId': '0-25',\n", + " 'nodes': [{'id': 'President Of India',\n", + " 'description': 'The Supreme Commander of the Indian Armed Forces.',\n", + " 'type': 'Person'},\n", + " {'id': 'Nda',\n", + " 'description': 'A coalition of the BJP and its allies governing India since 2014.',\n", + " 'type': 'Political alliance'},\n", + " {'id': 'Government Of India',\n", + " 'description': 'Established a system of national parks and protected areas.',\n", + " 'type': 'Government'},\n", + " {'id': 'Narendra Modi',\n", + " 'description': 'Current Prime Minister of India since 26 May 2014.',\n", + " 'type': 'Person'},\n", + " {'id': 'New Delhi',\n", + " 'description': 'The seat of the Government of India.',\n", + " 'type': 'Location'},\n", + " {'id': 'Supreme Court',\n", + " 'description': 'The highest judicial forum and final court of appeal under the Constitution of India.',\n", + " 'type': 'Judiciary'},\n", + " {'id': 'Indian Councils Act 1909',\n", + " 'description': 'Introduced elections to the Imperial Legislative Council.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'Government Of India Act 1919',\n", + " 'description': 'Expanded the Imperial Legislative Council.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'National Parks',\n", + " 'description': 'Established in 1935 and expanded to nearly 1022 by 2023.',\n", + " 'type': 'Protected area'},\n", + " {'id': 'Wildlife Protection Act Of 1972',\n", + " 'description': 'Enacted for the protection of wildlife.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'Project Tiger',\n", + " 'description': 'Special project for the protection of critical species.',\n", + " 'type': 'Conservation project'},\n", + " {'id': 'Project Elephant',\n", + " 'description': 'Special project for the protection of critical species.',\n", + " 'type': 'Conservation project'},\n", + " {'id': 'Project Dolphin',\n", + " 'description': 'Special project for the protection of critical species.',\n", + " 'type': 'Conservation project'}],\n", + " 'rels': [{'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'INITIATED',\n", + " 'end': 'Project Tiger'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'INITIATED',\n", + " 'end': 'Project Elephant'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'INITIATED',\n", + " 'end': 'Project Dolphin'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'GOVERNED_BY',\n", + " 'end': 'Nda'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'ENACTED',\n", + " 'end': 'Wildlife Protection Act Of 1972'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'ESTABLISHED',\n", + " 'end': 'National Parks'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'LEAD_BY',\n", + " 'end': 'Narendra Modi'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'RESPONSIBLE_TO',\n", + " 'end': 'Supreme Court'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'SEATED_IN',\n", + " 'end': 'New Delhi'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'HEAD_OF_STATE',\n", + " 'end': 'President Of India'},\n", + " {'start': 'Indian Councils Act 1909',\n", + " 'description': None,\n", + " 'type': 'INFLUENCED',\n", + " 'end': 'Government Of India'},\n", + " {'start': 'Government Of India Act 1919',\n", + " 'description': None,\n", + " 'type': 'INFLUENCED',\n", + " 'end': 'Government Of India'}]},\n", + " {'communityId': '0-112',\n", + " 'nodes': [{'id': 'Prime Minister Of India',\n", + " 'description': 'Holds executive authority and responsibility for national security.',\n", + " 'type': 'Person'},\n", + " {'id': 'Indian Armed Forces',\n", + " 'description': 'The military forces of the Republic of India.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Army',\n", + " 'description': 'One of the three professional uniformed services of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Navy',\n", + " 'description': 'One of the three professional uniformed services of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Air Force',\n", + " 'description': 'One of the three professional uniformed services of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Central Armed Police Forces',\n", + " 'description': 'Supports the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Coast Guard',\n", + " 'description': 'Supports the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Special Frontier Force',\n", + " 'description': 'Supports the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Strategic Forces Command',\n", + " 'description': 'An inter-service command of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Andaman And Nicobar Command',\n", + " 'description': 'An inter-service command of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Integrated Defence Staff',\n", + " 'description': 'An inter-service command of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Ministry Of Defence',\n", + " 'description': 'Manages the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Global Firepower Index',\n", + " 'description': 'Ranks the military forces globally.',\n", + " 'type': 'Report'},\n", + " {'id': 'Armed Forces Flag Day',\n", + " 'description': 'Honors armed forces and military personnel annually.',\n", + " 'type': 'Event'},\n", + " {'id': 'Mauryan Empire',\n", + " 'description': 'An ancient Indian empire known for its powerful military.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Chola Empire',\n", + " 'description': 'Known for foreign trade and maritime activity.',\n", + " 'type': 'Empire'}],\n", + " 'rels': [{'start': 'Prime Minister Of India',\n", + " 'description': None,\n", + " 'type': 'EXECUTIVE_AUTHORITY',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Indian Army'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Indian Navy'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Indian Air Force'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Central Armed Police Forces'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Indian Coast Guard'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Special Frontier Force'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Strategic Forces Command'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Andaman And Nicobar Command'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Integrated Defence Staff'},\n", + " {'start': 'Ministry Of Defence',\n", + " 'description': None,\n", + " 'type': 'MANAGES',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Global Firepower Index',\n", + " 'description': None,\n", + " 'type': 'RANKS',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Armed Forces Flag Day',\n", + " 'description': None,\n", + " 'type': 'HONORS',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Mauryan Empire',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_REFERENCE',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Chola Empire',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_REFERENCE',\n", + " 'end': 'Indian Armed Forces'}]},\n", + " {'communityId': '0-27',\n", + " 'nodes': [{'id': 'Rajya Sabha',\n", + " 'description': 'The mostly indirectly elected upper house of Parliament.',\n", + " 'type': 'Government body'},\n", + " {'id': 'Lok Sabha',\n", + " 'description': 'The directly elected lower house of Parliament.',\n", + " 'type': 'Government body'},\n", + " {'id': 'Union Council Of Ministers',\n", + " 'description': 'The executive decision-making committee of the government.',\n", + " 'type': 'Government body'}],\n", + " 'rels': [{'start': 'Union Council Of Ministers',\n", + " 'description': None,\n", + " 'type': 'RESPONSIBLE_TO',\n", + " 'end': 'Rajya Sabha'},\n", + " {'start': 'Union Council Of Ministers',\n", + " 'description': None,\n", + " 'type': 'RESPONSIBLE_TO',\n", + " 'end': 'Lok Sabha'}]},\n", + " {'communityId': '0-11',\n", + " 'nodes': [{'id': 'Republic Of India',\n", + " 'description': 'Has two principal official short names, India and Bharat.',\n", + " 'type': 'Country'},\n", + " {'id': 'Hindustan',\n", + " 'description': 'Commonly used name for the Republic of India.',\n", + " 'type': 'Name'}],\n", + " 'rels': [{'start': 'Republic Of India',\n", + " 'description': None,\n", + " 'type': 'HAS_NAME',\n", + " 'end': 'Hindustan'}]},\n", + " {'communityId': '0-15',\n", + " 'nodes': [{'id': 'Bharat',\n", + " 'description': 'Another principal official short name of the Republic of India.',\n", + " 'type': 'Name'},\n", + " {'id': 'Dushyanta',\n", + " 'description': 'Father of Bharata, associated with the name Bhāratavarṣa.',\n", + " 'type': 'Person'},\n", + " {'id': 'Mahabharata',\n", + " 'description': 'Epic associated with the name Bharata.',\n", + " 'type': 'Literature'}],\n", + " 'rels': [{'start': 'Bharat',\n", + " 'description': None,\n", + " 'type': 'ASSOCIATED_WITH',\n", + " 'end': 'Dushyanta'},\n", + " {'start': 'Bharat',\n", + " 'description': None,\n", + " 'type': 'ASSOCIATED_WITH',\n", + " 'end': 'Mahabharata'}]},\n", + " {'communityId': '0-14',\n", + " 'nodes': [{'id': 'Bhāratavarṣa',\n", + " 'description': 'Term used in the first century AD, derived from the name of the Vedic community of Bharatas.',\n", + " 'type': 'Historical term'},\n", + " {'id': 'Bharatas',\n", + " 'description': 'Mentioned in the Rigveda as one of the principal kingdoms of the Aryavarta.',\n", + " 'type': 'Vedic community'}],\n", + " 'rels': [{'start': 'Bhāratavarṣa',\n", + " 'description': None,\n", + " 'type': 'DERIVED_FROM',\n", + " 'end': 'Bharatas'}]},\n", + " {'communityId': '0-39',\n", + " 'nodes': [{'id': 'Thailand',\n", + " 'description': \"Shares maritime borders with India's Andaman and Nicobar Islands.\",\n", + " 'type': 'Country'},\n", + " {'id': 'Indonesia',\n", + " 'description': \"Shares maritime borders with India's Andaman and Nicobar Islands.\",\n", + " 'type': 'Country'},\n", + " {'id': 'Andaman And Nicobar Islands',\n", + " 'description': 'Islands of India sharing a maritime border with Thailand, Myanmar, and Indonesia.',\n", + " 'type': 'Island'}],\n", + " 'rels': [{'start': 'Andaman And Nicobar Islands',\n", + " 'description': None,\n", + " 'type': 'MARITIME_BORDER',\n", + " 'end': 'Thailand'},\n", + " {'start': 'Andaman And Nicobar Islands',\n", + " 'description': None,\n", + " 'type': 'MARITIME_BORDER',\n", + " 'end': 'Indonesia'}]},\n", + " {'communityId': '0-52',\n", + " 'nodes': [{'id': 'Constituent Assembly Of India',\n", + " 'description': 'Adopted the Constitution of India on 26 November 1949.',\n", + " 'type': 'Institution'},\n", + " {'id': 'Sardar Patel',\n", + " 'description': 'Convinced princely states to sign articles of integration with India.',\n", + " 'type': 'Person'},\n", + " {'id': 'V. P. Menon',\n", + " 'description': 'Convinced princely states to sign articles of integration with India.',\n", + " 'type': 'Person'}],\n", + " 'rels': [{'start': 'Sardar Patel',\n", + " 'description': 'Convinced princely states to sign articles of integration.',\n", + " 'type': 'CONVINCED',\n", + " 'end': 'Constituent Assembly Of India'},\n", + " {'start': 'V. P. Menon',\n", + " 'description': 'Convinced princely states to sign articles of integration.',\n", + " 'type': 'CONVINCED',\n", + " 'end': 'Constituent Assembly Of India'}]},\n", + " {'communityId': '0-78',\n", + " 'nodes': [{'id': 'Fauna Species',\n", + " 'description': 'India has an estimated 92,873 species of fauna.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Insects',\n", + " 'description': 'Major category with 63,423 recorded species.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Mammals Count',\n", + " 'description': '423 mammals in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Birds Count',\n", + " 'description': '1,233 birds in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Reptiles Count',\n", + " 'description': '526 reptiles in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Amphibians Count',\n", + " 'description': '342 amphibians in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Fish Count',\n", + " 'description': '3,022 fish in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Mammals',\n", + " 'description': '12.6% of mammals are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Birds',\n", + " 'description': '4.5% of birds are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Reptiles',\n", + " 'description': '45.8% of reptiles are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Amphibians',\n", + " 'description': '55.8% of amphibians are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Large Animals',\n", + " 'description': 'Includes Indian elephant, Indian rhinoceros, and Gaur.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Big Cats',\n", + " 'description': 'Includes tiger and lion.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Cat Family',\n", + " 'description': 'Includes Bengal tiger, Asiatic lion, Indian leopard, snow leopard, and clouded leopard.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Representative Species',\n", + " 'description': 'Includes blackbuck, nilgai, bharal, barasingha, Nilgiri tahr, and Nilgiri langur.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Aquatic Mammals',\n", + " 'description': 'Includes dolphins, whales, porpoises, and dugong.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Reptiles Species',\n", + " 'description': 'Includes gharial and saltwater crocodiles.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Birds Species',\n", + " 'description': 'Includes peafowl, pheasants, geese, ducks, mynas, parakeets, pigeons, cranes, hornbills, and sunbirds.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Bird Species',\n", + " 'description': 'Includes great Indian hornbill, great Indian bustard, nicobar pigeon, ruddy shelduck, Himalayan monal, and Himalayan quail.',\n", + " 'type': 'Fauna'}],\n", + " 'rels': [{'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Insects'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Mammals Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Birds Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Reptiles Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Amphibians Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Fish Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Mammals'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Birds'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Reptiles'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Amphibians'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Large Animals'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Big Cats'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Cat Family'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Representative Species'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Aquatic Mammals'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Reptiles Species'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Birds Species'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Bird Species'}]},\n", + " {'communityId': '0-110',\n", + " 'nodes': [{'id': 'Plant Species',\n", + " 'description': 'About 29,015 species of plants.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Flowering Plants Count',\n", + " 'description': '17,926 species of flowering plants.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Endemic Plant Species',\n", + " 'description': '6,842 species are endemic to India.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Algae', 'description': '7,244 species.', 'type': 'Flora'},\n", + " {'id': 'Bryophytes', 'description': '2,504 species.', 'type': 'Flora'},\n", + " {'id': 'Pteridophytes', 'description': '1,267 species.', 'type': 'Flora'},\n", + " {'id': 'Gymnosperms', 'description': '74 species.', 'type': 'Flora'},\n", + " {'id': 'Fungal Diversity',\n", + " 'description': 'Over 27,000 recorded species.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Flora', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Plant Species'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Flowering Plants Count'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Plant Species'},\n", + " {'start': 'Flora', 'description': None, 'type': 'INCLUDES', 'end': 'Algae'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Bryophytes'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Pteridophytes'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Gymnosperms'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Fungal Diversity'}]},\n", + " {'communityId': '0-105',\n", + " 'nodes': [{'id': 'Threatened Species',\n", + " 'description': '172 IUCN-designated threatened species.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Mammals',\n", + " 'description': '39 species of mammals.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Birds',\n", + " 'description': '72 species of birds.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Reptiles',\n", + " 'description': '17 species of reptiles.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Amphibians',\n", + " 'description': '3 species of amphibians.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Fish',\n", + " 'description': '2 species of fish.',\n", + " 'type': 'Fauna'}],\n", + " 'rels': [{'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Mammals'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Birds'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Reptiles'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Amphibians'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Fish'}]},\n", + " {'communityId': '0-125',\n", + " 'nodes': [{'id': 'Department Of Defence Production',\n", + " 'description': 'Responsible for indigenous production of equipment for the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Make In India Initiative',\n", + " 'description': 'Seeks to indigenise manufacturing and reduce dependence on imports for defence.',\n", + " 'type': 'Program'}],\n", + " 'rels': [{'start': 'Make In India Initiative',\n", + " 'description': None,\n", + " 'type': 'SUPPORTS',\n", + " 'end': 'Department Of Defence Production'}]},\n", + " {'communityId': '0-169',\n", + " 'nodes': [{'id': 'Maurya Empire',\n", + " 'description': 'Ancient empire based in the Ganges Basin.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Gupta Empire',\n", + " 'description': 'Ancient empire based in the Ganges Basin.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Ganges Basin', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'Ganges Basin',\n", + " 'description': None,\n", + " 'type': 'BASED_IN',\n", + " 'end': 'Maurya Empire'},\n", + " {'start': 'Ganges Basin',\n", + " 'description': None,\n", + " 'type': 'BASED_IN',\n", + " 'end': 'Gupta Empire'}]},\n", + " {'communityId': '0-170',\n", + " 'nodes': [{'id': 'Vijayanagara Empire',\n", + " 'description': 'Empire in south India that created a composite Hindu culture.',\n", + " 'type': 'Empire'},\n", + " {'id': 'South India', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'South India',\n", + " 'description': None,\n", + " 'type': 'BASED_IN',\n", + " 'end': 'Vijayanagara Empire'}]},\n", + " {'communityId': '0-171',\n", + " 'nodes': [{'id': 'Sikhism',\n", + " 'description': 'Religion that emerged in the Punjab, rejecting institutionalised religion.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Punjab', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'Punjab',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Sikhism'}]},\n", + " {'communityId': '0-167',\n", + " 'nodes': [{'id': 'British East India Company',\n", + " 'description': 'Company that expanded rule in India, turning it into a colonial economy.',\n", + " 'type': 'Company'},\n", + " {'id': 'British Crown',\n", + " 'description': 'Began rule in India in 1858.',\n", + " 'type': 'Government'},\n", + " {'id': 'Indian Independence',\n", + " 'description': 'Event in 1947 when India was partitioned into two independent dominions.',\n", + " 'type': 'Event'}],\n", + " 'rels': [{'start': 'British East India Company',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_EVENT',\n", + " 'end': 'British Crown'},\n", + " {'start': 'British Crown',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_EVENT',\n", + " 'end': 'Indian Independence'}]},\n", + " {'communityId': '1-23',\n", + " 'nodes': [{'id': 'India',\n", + " 'description': 'Officially the Republic of India, located in South Asia.',\n", + " 'type': 'Country'},\n", + " {'id': 'Supreme Court Of India',\n", + " 'description': 'Head of the independent judiciary.',\n", + " 'type': 'Judiciary'},\n", + " {'id': 'Indian National Congress',\n", + " 'description': 'Dominated Indian politics until 1977.',\n", + " 'type': 'Political party'},\n", + " {'id': 'Sindhu',\n", + " 'description': 'Name of the Indus River, from which the name India is derived.',\n", + " 'type': 'River'},\n", + " {'id': 'Food And Agriculture Organization Of The United Nations',\n", + " 'description': \"Estimates India's forest cover.\",\n", + " 'type': 'Organization'},\n", + " {'id': '2013 Forest Survey Of India',\n", + " 'description': \"States India's forest cover increased to 69.8 million hectares by 2012.\",\n", + " 'type': 'Report'},\n", + " {'id': 'Yajnavalkya Smriti',\n", + " 'description': 'Prohibited the cutting of trees.',\n", + " 'type': 'Literature'},\n", + " {'id': 'Kautalyas Arthashastra',\n", + " 'description': 'Discusses the need for forest administration.',\n", + " 'type': 'Literature'},\n", + " {'id': 'Crown Land (Encroachment) Ordinance',\n", + " 'description': \"Promulgated in 1840 targeting forests in Britain's Asian colonies.\",\n", + " 'type': 'Legislation'},\n", + " {'id': 'Indian Forest Act Of 1865',\n", + " 'description': \"Established the government's claims over forests.\",\n", + " 'type': 'Legislation'},\n", + " {'id': 'Forest Act Of 1878',\n", + " 'description': 'Gave the British government control over all wastelands, including forests.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'Sir Dietrich Brandis',\n", + " 'description': 'Inspector General of Forests in India from 1864 to 1883.',\n", + " 'type': 'Person'},\n", + " {'id': 'Indian Ocean',\n", + " 'description': 'Ocean bounded by India on the north.',\n", + " 'type': 'Ocean'},\n", + " {'id': 'Sri Lanka',\n", + " 'description': 'Country in the vicinity of India in the Indian Ocean.',\n", + " 'type': 'Country'},\n", + " {'id': 'Maldives',\n", + " 'description': 'Country in the vicinity of India in the Indian Ocean.',\n", + " 'type': 'Country'},\n", + " {'id': 'Myanmar',\n", + " 'description': 'Country sharing land border with India to the east.',\n", + " 'type': 'Country'},\n", + " {'id': 'Himalayan Mountain Range',\n", + " 'description': 'Defines the northern frontiers of India.',\n", + " 'type': 'Mountain range'},\n", + " {'id': 'China',\n", + " 'description': 'Country sharing land border with India to the north.',\n", + " 'type': 'Country'},\n", + " {'id': 'Bhutan',\n", + " 'description': 'Country sharing land border with India to the north.',\n", + " 'type': 'Country'},\n", + " {'id': 'Nepal',\n", + " 'description': 'Country sharing land border with India to the north.',\n", + " 'type': 'Country'},\n", + " {'id': 'Pakistan',\n", + " 'description': 'Country sharing land border with India to the west.',\n", + " 'type': 'Country'},\n", + " {'id': 'Bangladesh',\n", + " 'description': 'Country sharing land border with India to the east.',\n", + " 'type': 'Country'},\n", + " {'id': 'Ganges',\n", + " 'description': 'The longest river originating in India.',\n", + " 'type': 'River'},\n", + " {'id': 'Western Ghats',\n", + " 'description': 'One of the three biodiversity hotspots in India.',\n", + " 'type': 'Biodiversity hotspot'},\n", + " {'id': 'Eastern Himalayas',\n", + " 'description': 'One of the three biodiversity hotspots in India.',\n", + " 'type': 'Biodiversity hotspot'},\n", + " {'id': 'Indo-Burma Hotspot',\n", + " 'description': 'One of the three biodiversity hotspots in India.',\n", + " 'type': 'Biodiversity hotspot'},\n", + " {'id': 'Forests',\n", + " 'description': 'Covers about 24.6% of the total land area.',\n", + " 'type': 'Ecosystem'},\n", + " {'id': 'Indomalayan Realm',\n", + " 'description': 'Geographical realm that includes India.',\n", + " 'type': 'Realm'},\n", + " {'id': 'World', 'description': None, 'type': None},\n", + " {'id': 'Public_Sector', 'description': None, 'type': None},\n", + " {'id': 'Gdp', 'description': None, 'type': None},\n", + " {'id': 'Economic_Liberalisation', 'description': None, 'type': None},\n", + " {'id': 'Gdp_Growth', 'description': None, 'type': None},\n", + " {'id': 'Consumer_Market', 'description': None, 'type': None},\n", + " {'id': 'Wto', 'description': None, 'type': None},\n", + " {'id': 'Ease_Of_Business_Index', 'description': None, 'type': None},\n", + " {'id': 'Global_Competitiveness_Index', 'description': None, 'type': None},\n", + " {'id': 'Billionaires', 'description': None, 'type': None},\n", + " {'id': 'Welfare_State', 'description': None, 'type': None},\n", + " {'id': 'Socialist_State', 'description': None, 'type': None},\n", + " {'id': 'Social_Welfare_Spending', 'description': None, 'type': None},\n", + " {'id': 'Labour_Force', 'description': None, 'type': None},\n", + " {'id': 'Fdi', 'description': None, 'type': None},\n", + " {'id': 'Free_Trade_Agreements', 'description': None, 'type': None},\n", + " {'id': 'Service_Sector', 'description': None, 'type': None},\n", + " {'id': 'Stock_Exchanges', 'description': None, 'type': None},\n", + " {'id': 'Manufacturing', 'description': None, 'type': None},\n", + " {'id': 'Population', 'description': None, 'type': None},\n", + " {'id': 'Unemployment', 'description': None, 'type': None},\n", + " {'id': 'Savings_Rate', 'description': None, 'type': None},\n", + " {'id': 'Arabian Sea',\n", + " 'description': 'Sea bounded by India on the northwest.',\n", + " 'type': 'Sea'},\n", + " {'id': 'Bay Of Bengal',\n", + " 'description': 'Bay bounded by India on the southeast.',\n", + " 'type': 'Bay'},\n", + " {'id': 'Africa',\n", + " 'description': 'Continent from which modern humans arrived on the Indian subcontinent.',\n", + " 'type': 'Continent'},\n", + " {'id': 'Indus Valley Civilisation',\n", + " 'description': 'Civilisation that evolved from settled life in the western margins of the Indus river basin.',\n", + " 'type': 'Civilization'},\n", + " {'id': 'Sanskrit',\n", + " 'description': 'Indo-European language that diffused into India from the northwest.',\n", + " 'type': 'Language'},\n", + " {'id': 'Rigveda',\n", + " 'description': 'Ancient hymns recording the dawning of Hinduism in India.',\n", + " 'type': 'Text'},\n", + " {'id': 'Hinduism',\n", + " 'description': 'Major religion in India, with roots in the Rigveda.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Buddhism',\n", + " 'description': 'Religion that arose in India, proclaiming social orders unlinked to heredity.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Jainism',\n", + " 'description': 'Religion that arose in India, proclaiming social orders unlinked to heredity.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Mughal Empire',\n", + " 'description': 'Empire that began in 1526, known for its architecture and relative peace.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Indian Republic',\n", + " 'description': 'India has been a federal republic since 1950.',\n", + " 'type': 'Government'},\n", + " {'id': 'Thailand',\n", + " 'description': \"Shares maritime borders with India's Andaman and Nicobar Islands.\",\n", + " 'type': 'Country'},\n", + " {'id': 'Indonesia',\n", + " 'description': \"Shares maritime borders with India's Andaman and Nicobar Islands.\",\n", + " 'type': 'Country'},\n", + " {'id': 'Andaman And Nicobar Islands',\n", + " 'description': 'Islands of India sharing a maritime border with Thailand, Myanmar, and Indonesia.',\n", + " 'type': 'Island'}],\n", + " 'rels': [{'start': 'India',\n", + " 'description': None,\n", + " 'type': 'IS_PART_OF',\n", + " 'end': 'Indomalayan Realm'},\n", + " {'start': 'India',\n", + " 'description': 'About 1,900 public sector companies with complete control and ownership of railways, highways, and majority control in various industries.',\n", + " 'type': 'HAS',\n", + " 'end': 'Public_Sector'},\n", + " {'start': 'India',\n", + " 'description': \"One of the world's highest number of billionaires.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Billionaires'},\n", + " {'start': 'India',\n", + " 'description': \"World's second-largest labour force with 586 million workers.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Labour_Force'},\n", + " {'start': 'India',\n", + " 'description': 'Has free trade agreements with several nations and blocs.',\n", + " 'type': 'HAS',\n", + " 'end': 'Free_Trade_Agreements'},\n", + " {'start': 'India',\n", + " 'description': \"Bombay Stock Exchange and National Stock Exchange are among the world's largest stock exchanges.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Stock_Exchanges'},\n", + " {'start': 'India',\n", + " 'description': \"Nearly 65% of India's population is rural.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Population'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'RELATES_TO',\n", + " 'end': 'Rigveda'},\n", + " {'start': 'India',\n", + " 'description': 'Member of the World Trade Organization since 1 January 1995.',\n", + " 'type': 'MEMBER_OF',\n", + " 'end': 'Wto'},\n", + " {'start': 'India',\n", + " 'description': 'High unemployment and rising income inequality.',\n", + " 'type': 'FACES',\n", + " 'end': 'Unemployment'},\n", + " {'start': 'India',\n", + " 'description': 'Adopted broad economic liberalisation in 1991.',\n", + " 'type': 'ADOPTED',\n", + " 'end': 'Economic_Liberalisation'},\n", + " {'start': 'India',\n", + " 'description': 'Often considered a welfare state.',\n", + " 'type': 'CONSIDERED_AS',\n", + " 'end': 'Welfare_State'},\n", + " {'start': 'India', 'description': None, 'type': 'NEAR', 'end': 'Sri Lanka'},\n", + " {'start': 'India', 'description': None, 'type': 'NEAR', 'end': 'Maldives'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Indian Ocean'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Myanmar'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Himalayan Mountain Range'},\n", + " {'start': 'India', 'description': None, 'type': 'BORDERS', 'end': 'China'},\n", + " {'start': 'India', 'description': None, 'type': 'BORDERS', 'end': 'Bhutan'},\n", + " {'start': 'India', 'description': None, 'type': 'BORDERS', 'end': 'Nepal'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Pakistan'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Bangladesh'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Arabian Sea'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Bay Of Bengal'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'SEPARATED_BY',\n", + " 'end': 'Sri Lanka'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'DERIVED_FROM',\n", + " 'end': 'Sindhu'},\n", + " {'start': 'India',\n", + " 'description': \"Estimates India's forest cover.\",\n", + " 'type': 'ESTIMATES',\n", + " 'end': 'Food And Agriculture Organization Of The United Nations'},\n", + " {'start': 'India',\n", + " 'description': 'Makes up more than 50% of GDP.',\n", + " 'type': 'MAKES_UP',\n", + " 'end': 'Service_Sector'},\n", + " {'start': 'India',\n", + " 'description': 'Fifth-largest economy by nominal GDP and third-largest by purchasing power parity (PPP).',\n", + " 'type': 'RANKS_AS',\n", + " 'end': 'World'},\n", + " {'start': 'India',\n", + " 'description': 'Fourth-largest consumer market in the world.',\n", + " 'type': 'RANKS_AS',\n", + " 'end': 'Consumer_Market'},\n", + " {'start': 'India',\n", + " 'description': \"World's sixth-largest manufacturer, representing 2.6% of global manufacturing output.\",\n", + " 'type': 'RANKS_AS',\n", + " 'end': 'Manufacturing'},\n", + " {'start': 'India',\n", + " 'description': 'Officially declared a socialist state as per the constitution.',\n", + " 'type': 'DECLARED_AS',\n", + " 'end': 'Socialist_State'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Hinduism'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Buddhism'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Jainism'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'GOVERNED_AS',\n", + " 'end': 'Indian Republic'},\n", + " {'start': 'India',\n", + " 'description': '136th by GDP (nominal) and 125th by GDP (PPP) on a per capita income basis.',\n", + " 'type': 'RANKS',\n", + " 'end': 'Gdp'},\n", + " {'start': 'India',\n", + " 'description': '63rd on the Ease of Doing Business index.',\n", + " 'type': 'RANKS',\n", + " 'end': 'Ease_Of_Business_Index'},\n", + " {'start': 'India',\n", + " 'description': '40th on the Global Competitiveness Index.',\n", + " 'type': 'RANKS',\n", + " 'end': 'Global_Competitiveness_Index'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'SHARES_BORDERS_WITH',\n", + " 'end': 'Myanmar'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'SHARES_BORDERS_WITH',\n", + " 'end': 'Thailand'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'SHARES_BORDERS_WITH',\n", + " 'end': 'Indonesia'},\n", + " {'start': 'India',\n", + " 'description': 'Overall social welfare spending stood at 8.6% of GDP in 2021-22.',\n", + " 'type': 'SPENDING',\n", + " 'end': 'Social_Welfare_Spending'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_EVENT',\n", + " 'end': 'Mughal Empire'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGINATES_IN',\n", + " 'end': 'Ganges'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_BIODIVERSITY_HOTSPOT',\n", + " 'end': 'Western Ghats'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_BIODIVERSITY_HOTSPOT',\n", + " 'end': 'Eastern Himalayas'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_BIODIVERSITY_HOTSPOT',\n", + " 'end': 'Indo-Burma Hotspot'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_ECOSYSTEM',\n", + " 'end': 'Forests'},\n", + " {'start': 'India',\n", + " 'description': '6% to 7% since the start of the 21st century.',\n", + " 'type': 'AVERAGE_ANNUAL_GROWTH',\n", + " 'end': 'Gdp_Growth'},\n", + " {'start': 'India',\n", + " 'description': 'Foreign direct investment in 2021-22 was $82 billion.',\n", + " 'type': 'FOREIGN_DIRECT_INVESTMENT',\n", + " 'end': 'Fdi'},\n", + " {'start': 'India',\n", + " 'description': 'Gross domestic savings rate stood at 29.3% of GDP in 2022.',\n", + " 'type': 'GROSS_DOMESTIC_SAVINGS_RATE',\n", + " 'end': 'Savings_Rate'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HUMAN_ORIGIN',\n", + " 'end': 'Africa'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_DEVELOPMENT',\n", + " 'end': 'Indus Valley Civilisation'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'LANGUAGE_DIFFUSION',\n", + " 'end': 'Sanskrit'},\n", + " {'start': 'Kautalyas Arthashastra',\n", + " 'description': 'Discusses the need for forest administration.',\n", + " 'type': 'DISCUSSES',\n", + " 'end': 'India'},\n", + " {'start': 'Indian Forest Act Of 1865',\n", + " 'description': \"Establishes government's claims over forests.\",\n", + " 'type': 'ESTABLISHES',\n", + " 'end': 'India'},\n", + " {'start': 'Crown Land (Encroachment) Ordinance',\n", + " 'description': \"Targets forests in Britain's Asian colonies.\",\n", + " 'type': 'TARGETS',\n", + " 'end': 'India'},\n", + " {'start': 'Yajnavalkya Smriti',\n", + " 'description': 'Prohibits the cutting of trees.',\n", + " 'type': 'PROHIBITS',\n", + " 'end': 'India'},\n", + " {'start': '2013 Forest Survey Of India',\n", + " 'description': 'Reports on forest cover increase.',\n", + " 'type': 'REPORTS_ON',\n", + " 'end': 'India'},\n", + " {'start': 'Supreme Court Of India',\n", + " 'description': None,\n", + " 'type': 'PART_OF_JUDICIARY',\n", + " 'end': 'India'},\n", + " {'start': 'Indian National Congress',\n", + " 'description': None,\n", + " 'type': 'HISTORICALLY_DOMINANT_PARTY',\n", + " 'end': 'India'},\n", + " {'start': 'Forest Act Of 1878',\n", + " 'description': 'Gives control over all wastelands, including forests.',\n", + " 'type': 'GIVES_CONTROL',\n", + " 'end': 'India'},\n", + " {'start': 'Sir Dietrich Brandis',\n", + " 'description': 'Inspector General of Forests in India from 1864 to 1883.',\n", + " 'type': 'INSPECTOR_GENERAL_OF',\n", + " 'end': 'India'},\n", + " {'start': 'Andaman And Nicobar Islands',\n", + " 'description': None,\n", + " 'type': 'MARITIME_BORDER',\n", + " 'end': 'Myanmar'},\n", + " {'start': 'Andaman And Nicobar Islands',\n", + " 'description': None,\n", + " 'type': 'MARITIME_BORDER',\n", + " 'end': 'Thailand'},\n", + " {'start': 'Andaman And Nicobar Islands',\n", + " 'description': None,\n", + " 'type': 'MARITIME_BORDER',\n", + " 'end': 'Indonesia'}]},\n", + " {'communityId': '1-7',\n", + " 'nodes': [{'id': 'Constitution Of India',\n", + " 'description': 'The supreme law of India, laying down the framework for government institutions and fundamental rights.',\n", + " 'type': 'Document'},\n", + " {'id': 'Parliament',\n", + " 'description': 'Cannot override the Constitution of India.',\n", + " 'type': 'Institution'},\n", + " {'id': 'Government Of India Act 1935',\n", + " 'description': 'Replaced by the Constitution of India.',\n", + " 'type': 'Document'},\n", + " {'id': 'Constituent Assembly',\n", + " 'description': 'Tasked with drafting the Constitution of India.',\n", + " 'type': 'Government body'},\n", + " {'id': 'M. N. Roy',\n", + " 'description': 'Proposed the framework for the Constitution of India.',\n", + " 'type': 'Person'},\n", + " {'id': 'Republic Day',\n", + " 'description': 'Celebrated on 26 January in India to honor the Constitution.',\n", + " 'type': 'Event'},\n", + " {'id': 'Old Parliament House',\n", + " 'description': 'Preserves the original 1950 Constitution in a nitrogen-filled case.',\n", + " 'type': 'Location'},\n", + " {'id': 'British Government',\n", + " 'description': 'Responsible for the external security of India during its dominion status.',\n", + " 'type': 'Institution'},\n", + " {'id': 'Indian Independence Act 1947',\n", + " 'description': 'Repealed by the Constitution of India.',\n", + " 'type': 'Document'},\n", + " {'id': 'Articles Of The Constitution',\n", + " 'description': 'Articles 5, 6, 7, 8, 9, 60, 324, 366, 367, 379, 380, 388, 391, 392, 393, and 394 came into force on 26 November 1949.',\n", + " 'type': 'Document'},\n", + " {'id': 'Constituent Assembly Of India',\n", + " 'description': 'Adopted the Constitution of India on 26 November 1949.',\n", + " 'type': 'Institution'},\n", + " {'id': 'Sardar Patel',\n", + " 'description': 'Convinced princely states to sign articles of integration with India.',\n", + " 'type': 'Person'},\n", + " {'id': 'V. P. Menon',\n", + " 'description': 'Convinced princely states to sign articles of integration with India.',\n", + " 'type': 'Person'}],\n", + " 'rels': [{'start': 'Constitution Of India',\n", + " 'description': 'The Constitution is based on the proposal suggested by M. N. Roy.',\n", + " 'type': 'BASED_ON',\n", + " 'end': 'M. N. Roy'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Adopted by the Constituent Assembly on 26 November 1949.',\n", + " 'type': 'ADOPTED_BY',\n", + " 'end': 'Constituent Assembly Of India'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'The original 1950 Constitution is preserved in a nitrogen-filled case.',\n", + " 'type': 'PRESERVED_IN',\n", + " 'end': 'Old Parliament House'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Celebrated on 26 January.',\n", + " 'type': 'CELEBRATED_ON',\n", + " 'end': 'Republic Day'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Parliament cannot override the Constitution.',\n", + " 'type': 'CANNOT_OVERRIDE',\n", + " 'end': 'Parliament'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Certain articles came into force on 26 November 1949.',\n", + " 'type': 'CAME_INTO_FORCE',\n", + " 'end': 'Articles Of The Constitution'},\n", + " {'start': 'Government Of India Act 1935',\n", + " 'description': 'Replaced by the Constitution of India.',\n", + " 'type': 'REPLACED_BY',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'British Government',\n", + " 'description': 'Responsible for the external security of India during its dominion status.',\n", + " 'type': 'RESPONSIBLE_FOR',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'Indian Independence Act 1947',\n", + " 'description': 'Repealed by the Constitution of India.',\n", + " 'type': 'REPEALED_BY',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'Constituent Assembly',\n", + " 'description': None,\n", + " 'type': 'DRAFTED_BY',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'Sardar Patel',\n", + " 'description': 'Convinced princely states to sign articles of integration.',\n", + " 'type': 'CONVINCED',\n", + " 'end': 'Constituent Assembly Of India'},\n", + " {'start': 'V. P. Menon',\n", + " 'description': 'Convinced princely states to sign articles of integration.',\n", + " 'type': 'CONVINCED',\n", + " 'end': 'Constituent Assembly Of India'}]},\n", + " {'communityId': '1-4',\n", + " 'nodes': [{'id': 'President Of India',\n", + " 'description': 'The Supreme Commander of the Indian Armed Forces.',\n", + " 'type': 'Person'},\n", + " {'id': 'Nda',\n", + " 'description': 'A coalition of the BJP and its allies governing India since 2014.',\n", + " 'type': 'Political alliance'},\n", + " {'id': 'Government Of India',\n", + " 'description': 'Established a system of national parks and protected areas.',\n", + " 'type': 'Government'},\n", + " {'id': 'Narendra Modi',\n", + " 'description': 'Current Prime Minister of India since 26 May 2014.',\n", + " 'type': 'Person'},\n", + " {'id': 'New Delhi',\n", + " 'description': 'The seat of the Government of India.',\n", + " 'type': 'Location'},\n", + " {'id': 'Supreme Court',\n", + " 'description': 'The highest judicial forum and final court of appeal under the Constitution of India.',\n", + " 'type': 'Judiciary'},\n", + " {'id': 'Indian Councils Act 1909',\n", + " 'description': 'Introduced elections to the Imperial Legislative Council.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'Government Of India Act 1919',\n", + " 'description': 'Expanded the Imperial Legislative Council.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'National Parks',\n", + " 'description': 'Established in 1935 and expanded to nearly 1022 by 2023.',\n", + " 'type': 'Protected area'},\n", + " {'id': 'Wildlife Protection Act Of 1972',\n", + " 'description': 'Enacted for the protection of wildlife.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'Project Tiger',\n", + " 'description': 'Special project for the protection of critical species.',\n", + " 'type': 'Conservation project'},\n", + " {'id': 'Project Elephant',\n", + " 'description': 'Special project for the protection of critical species.',\n", + " 'type': 'Conservation project'},\n", + " {'id': 'Project Dolphin',\n", + " 'description': 'Special project for the protection of critical species.',\n", + " 'type': 'Conservation project'},\n", + " {'id': 'Rajya Sabha',\n", + " 'description': 'The mostly indirectly elected upper house of Parliament.',\n", + " 'type': 'Government body'},\n", + " {'id': 'Lok Sabha',\n", + " 'description': 'The directly elected lower house of Parliament.',\n", + " 'type': 'Government body'},\n", + " {'id': 'Union Council Of Ministers',\n", + " 'description': 'The executive decision-making committee of the government.',\n", + " 'type': 'Government body'}],\n", + " 'rels': [{'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'INITIATED',\n", + " 'end': 'Project Tiger'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'INITIATED',\n", + " 'end': 'Project Elephant'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'INITIATED',\n", + " 'end': 'Project Dolphin'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'GOVERNED_BY',\n", + " 'end': 'Nda'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'COMPOSED_OF',\n", + " 'end': 'Union Council Of Ministers'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'ENACTED',\n", + " 'end': 'Wildlife Protection Act Of 1972'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'ESTABLISHED',\n", + " 'end': 'National Parks'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'LEAD_BY',\n", + " 'end': 'Narendra Modi'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'RESPONSIBLE_TO',\n", + " 'end': 'Supreme Court'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'SEATED_IN',\n", + " 'end': 'New Delhi'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'HEAD_OF_STATE',\n", + " 'end': 'President Of India'},\n", + " {'start': 'Indian Councils Act 1909',\n", + " 'description': None,\n", + " 'type': 'INFLUENCED',\n", + " 'end': 'Government Of India'},\n", + " {'start': 'Government Of India Act 1919',\n", + " 'description': None,\n", + " 'type': 'INFLUENCED',\n", + " 'end': 'Government Of India'},\n", + " {'start': 'Union Council Of Ministers',\n", + " 'description': None,\n", + " 'type': 'RESPONSIBLE_TO',\n", + " 'end': 'Rajya Sabha'},\n", + " {'start': 'Union Council Of Ministers',\n", + " 'description': None,\n", + " 'type': 'RESPONSIBLE_TO',\n", + " 'end': 'Lok Sabha'}]},\n", + " {'communityId': '1-22',\n", + " 'nodes': [{'id': 'Prime Minister Of India',\n", + " 'description': 'Holds executive authority and responsibility for national security.',\n", + " 'type': 'Person'},\n", + " {'id': 'Indian Armed Forces',\n", + " 'description': 'The military forces of the Republic of India.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Army',\n", + " 'description': 'One of the three professional uniformed services of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Navy',\n", + " 'description': 'One of the three professional uniformed services of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Air Force',\n", + " 'description': 'One of the three professional uniformed services of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Central Armed Police Forces',\n", + " 'description': 'Supports the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Coast Guard',\n", + " 'description': 'Supports the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Special Frontier Force',\n", + " 'description': 'Supports the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Strategic Forces Command',\n", + " 'description': 'An inter-service command of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Andaman And Nicobar Command',\n", + " 'description': 'An inter-service command of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Integrated Defence Staff',\n", + " 'description': 'An inter-service command of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Ministry Of Defence',\n", + " 'description': 'Manages the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Global Firepower Index',\n", + " 'description': 'Ranks the military forces globally.',\n", + " 'type': 'Report'},\n", + " {'id': 'Armed Forces Flag Day',\n", + " 'description': 'Honors armed forces and military personnel annually.',\n", + " 'type': 'Event'},\n", + " {'id': 'Mauryan Empire',\n", + " 'description': 'An ancient Indian empire known for its powerful military.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Chola Empire',\n", + " 'description': 'Known for foreign trade and maritime activity.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Department Of Defence Production',\n", + " 'description': 'Responsible for indigenous production of equipment for the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Make In India Initiative',\n", + " 'description': 'Seeks to indigenise manufacturing and reduce dependence on imports for defence.',\n", + " 'type': 'Program'}],\n", + " 'rels': [{'start': 'Prime Minister Of India',\n", + " 'description': None,\n", + " 'type': 'EXECUTIVE_AUTHORITY',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Indian Army'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Indian Navy'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Indian Air Force'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Central Armed Police Forces'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Indian Coast Guard'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Special Frontier Force'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Strategic Forces Command'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Andaman And Nicobar Command'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Integrated Defence Staff'},\n", + " {'start': 'Ministry Of Defence',\n", + " 'description': None,\n", + " 'type': 'MANAGES',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Global Firepower Index',\n", + " 'description': None,\n", + " 'type': 'RANKS',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Armed Forces Flag Day',\n", + " 'description': None,\n", + " 'type': 'HONORS',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Department Of Defence Production',\n", + " 'description': None,\n", + " 'type': 'PRODUCES_EQUIPMENT_FOR',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Mauryan Empire',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_REFERENCE',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Chola Empire',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_REFERENCE',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Make In India Initiative',\n", + " 'description': None,\n", + " 'type': 'SUPPORTS',\n", + " 'end': 'Department Of Defence Production'}]},\n", + " {'communityId': '1-2',\n", + " 'nodes': [{'id': 'Republic Of India',\n", + " 'description': 'Has two principal official short names, India and Bharat.',\n", + " 'type': 'Country'},\n", + " {'id': 'Hindustan',\n", + " 'description': 'Commonly used name for the Republic of India.',\n", + " 'type': 'Name'},\n", + " {'id': 'Bharat',\n", + " 'description': 'Another principal official short name of the Republic of India.',\n", + " 'type': 'Name'},\n", + " {'id': 'Dushyanta',\n", + " 'description': 'Father of Bharata, associated with the name Bhāratavarṣa.',\n", + " 'type': 'Person'},\n", + " {'id': 'Mahabharata',\n", + " 'description': 'Epic associated with the name Bharata.',\n", + " 'type': 'Literature'},\n", + " {'id': 'Bhāratavarṣa',\n", + " 'description': 'Term used in the first century AD, derived from the name of the Vedic community of Bharatas.',\n", + " 'type': 'Historical term'},\n", + " {'id': 'Bharatas',\n", + " 'description': 'Mentioned in the Rigveda as one of the principal kingdoms of the Aryavarta.',\n", + " 'type': 'Vedic community'}],\n", + " 'rels': [{'start': 'Republic Of India',\n", + " 'description': None,\n", + " 'type': 'HAS_NAME',\n", + " 'end': 'Bharat'},\n", + " {'start': 'Republic Of India',\n", + " 'description': None,\n", + " 'type': 'HAS_NAME',\n", + " 'end': 'Hindustan'},\n", + " {'start': 'Bharat',\n", + " 'description': None,\n", + " 'type': 'ASSOCIATED_WITH',\n", + " 'end': 'Dushyanta'},\n", + " {'start': 'Bharat',\n", + " 'description': None,\n", + " 'type': 'ASSOCIATED_WITH',\n", + " 'end': 'Mahabharata'},\n", + " {'start': 'Bharat',\n", + " 'description': None,\n", + " 'type': 'DERIVED_FROM',\n", + " 'end': 'Bhāratavarṣa'},\n", + " {'start': 'Bhāratavarṣa',\n", + " 'description': None,\n", + " 'type': 'DERIVED_FROM',\n", + " 'end': 'Bharatas'}]},\n", + " {'communityId': '1-18',\n", + " 'nodes': [{'id': 'Fauna Species',\n", + " 'description': 'India has an estimated 92,873 species of fauna.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Insects',\n", + " 'description': 'Major category with 63,423 recorded species.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Mammals Count',\n", + " 'description': '423 mammals in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Birds Count',\n", + " 'description': '1,233 birds in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Reptiles Count',\n", + " 'description': '526 reptiles in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Amphibians Count',\n", + " 'description': '342 amphibians in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Fish Count',\n", + " 'description': '3,022 fish in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Mammals',\n", + " 'description': '12.6% of mammals are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Birds',\n", + " 'description': '4.5% of birds are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Reptiles',\n", + " 'description': '45.8% of reptiles are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Amphibians',\n", + " 'description': '55.8% of amphibians are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Large Animals',\n", + " 'description': 'Includes Indian elephant, Indian rhinoceros, and Gaur.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Big Cats',\n", + " 'description': 'Includes tiger and lion.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Cat Family',\n", + " 'description': 'Includes Bengal tiger, Asiatic lion, Indian leopard, snow leopard, and clouded leopard.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Representative Species',\n", + " 'description': 'Includes blackbuck, nilgai, bharal, barasingha, Nilgiri tahr, and Nilgiri langur.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Aquatic Mammals',\n", + " 'description': 'Includes dolphins, whales, porpoises, and dugong.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Reptiles Species',\n", + " 'description': 'Includes gharial and saltwater crocodiles.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Birds Species',\n", + " 'description': 'Includes peafowl, pheasants, geese, ducks, mynas, parakeets, pigeons, cranes, hornbills, and sunbirds.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Bird Species',\n", + " 'description': 'Includes great Indian hornbill, great Indian bustard, nicobar pigeon, ruddy shelduck, Himalayan monal, and Himalayan quail.',\n", + " 'type': 'Fauna'}],\n", + " 'rels': [{'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Insects'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Mammals Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Birds Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Reptiles Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Amphibians Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Fish Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Mammals'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Birds'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Reptiles'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Amphibians'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Large Animals'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Big Cats'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Cat Family'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Representative Species'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Aquatic Mammals'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Reptiles Species'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Birds Species'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Bird Species'}]},\n", + " {'communityId': '1-20',\n", + " 'nodes': [{'id': 'Plant Species',\n", + " 'description': 'About 29,015 species of plants.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Flowering Plants Count',\n", + " 'description': '17,926 species of flowering plants.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Endemic Plant Species',\n", + " 'description': '6,842 species are endemic to India.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Algae', 'description': '7,244 species.', 'type': 'Flora'},\n", + " {'id': 'Bryophytes', 'description': '2,504 species.', 'type': 'Flora'},\n", + " {'id': 'Pteridophytes', 'description': '1,267 species.', 'type': 'Flora'},\n", + " {'id': 'Gymnosperms', 'description': '74 species.', 'type': 'Flora'},\n", + " {'id': 'Fungal Diversity',\n", + " 'description': 'Over 27,000 recorded species.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Flora', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Plant Species'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Flowering Plants Count'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Plant Species'},\n", + " {'start': 'Flora', 'description': None, 'type': 'INCLUDES', 'end': 'Algae'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Bryophytes'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Pteridophytes'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Gymnosperms'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Fungal Diversity'}]},\n", + " {'communityId': '1-19',\n", + " 'nodes': [{'id': 'Threatened Species',\n", + " 'description': '172 IUCN-designated threatened species.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Mammals',\n", + " 'description': '39 species of mammals.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Birds',\n", + " 'description': '72 species of birds.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Reptiles',\n", + " 'description': '17 species of reptiles.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Amphibians',\n", + " 'description': '3 species of amphibians.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Fish',\n", + " 'description': '2 species of fish.',\n", + " 'type': 'Fauna'}],\n", + " 'rels': [{'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Mammals'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Birds'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Reptiles'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Amphibians'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Fish'}]},\n", + " {'communityId': '1-25',\n", + " 'nodes': [{'id': 'Maurya Empire',\n", + " 'description': 'Ancient empire based in the Ganges Basin.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Gupta Empire',\n", + " 'description': 'Ancient empire based in the Ganges Basin.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Ganges Basin', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'Ganges Basin',\n", + " 'description': None,\n", + " 'type': 'BASED_IN',\n", + " 'end': 'Maurya Empire'},\n", + " {'start': 'Ganges Basin',\n", + " 'description': None,\n", + " 'type': 'BASED_IN',\n", + " 'end': 'Gupta Empire'}]},\n", + " {'communityId': '1-26',\n", + " 'nodes': [{'id': 'Vijayanagara Empire',\n", + " 'description': 'Empire in south India that created a composite Hindu culture.',\n", + " 'type': 'Empire'},\n", + " {'id': 'South India', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'South India',\n", + " 'description': None,\n", + " 'type': 'BASED_IN',\n", + " 'end': 'Vijayanagara Empire'}]},\n", + " {'communityId': '1-27',\n", + " 'nodes': [{'id': 'Sikhism',\n", + " 'description': 'Religion that emerged in the Punjab, rejecting institutionalised religion.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Punjab', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'Punjab',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Sikhism'}]},\n", + " {'communityId': '1-24',\n", + " 'nodes': [{'id': 'British East India Company',\n", + " 'description': 'Company that expanded rule in India, turning it into a colonial economy.',\n", + " 'type': 'Company'},\n", + " {'id': 'British Crown',\n", + " 'description': 'Began rule in India in 1858.',\n", + " 'type': 'Government'},\n", + " {'id': 'Indian Independence',\n", + " 'description': 'Event in 1947 when India was partitioned into two independent dominions.',\n", + " 'type': 'Event'}],\n", + " 'rels': [{'start': 'British East India Company',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_EVENT',\n", + " 'end': 'British Crown'},\n", + " {'start': 'British Crown',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_EVENT',\n", + " 'end': 'Indian Independence'}]}]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "community_info" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "community_template = \"\"\"Based on the provided nodes and relationships that belong to the same graph community,\n", + "generate a natural language summary of the provided information:\n", + "{community_info}\n", + "\n", + "Summary:\"\"\" \n", + "model = \"openai-gpt-4o\"\n", + "llm, model_name = get_llm(model)\n", + "community_prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"Given an input triples, generate the information summary. No pre-amble.\",\n", + " ),\n", + " (\"human\", community_template),\n", + " ]\n", + ")\n", + "\n", + "community_chain = community_prompt | llm | StrOutputParser()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def prepare_string(data):\n", + " nodes_str = \"Nodes are:\\n\"\n", + " for node in data['nodes']:\n", + " node_id = node['id']\n", + " node_type = node['type']\n", + " if 'description' in node and node['description']:\n", + " node_description = f\", description: {node['description']}\"\n", + " else:\n", + " node_description = \"\"\n", + " nodes_str += f\"id: {node_id}, type: {node_type}{node_description}\\n\"\n", + "\n", + " rels_str = \"Relationships are:\\n\"\n", + " for rel in data['rels']:\n", + " start = rel['start']\n", + " end = rel['end']\n", + " rel_type = rel['type']\n", + " if 'description' in rel and rel['description']:\n", + " description = f\", description: {rel['description']}\"\n", + " else:\n", + " description = \"\"\n", + " rels_str += f\"({start})-[:{rel_type}]->({end}){description}\\n\"\n", + "\n", + " return nodes_str + \"\\n\" + rels_str\n", + "\n", + "def process_community(community):\n", + " stringify_info = prepare_string(community)\n", + " summary = community_chain.invoke({'community_info': stringify_info})\n", + " return {\"community\": community['communityId'], \"summary\": summary}" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nodes are:\n", + "id: India, type: Country, description: Officially the Republic of India, located in South Asia.\n", + "id: Supreme Court Of India, type: Judiciary, description: Head of the independent judiciary.\n", + "id: Indian National Congress, type: Political party, description: Dominated Indian politics until 1977.\n", + "id: Sindhu, type: River, description: Name of the Indus River, from which the name India is derived.\n", + "id: Food And Agriculture Organization Of The United Nations, type: Organization, description: Estimates India's forest cover.\n", + "id: 2013 Forest Survey Of India, type: Report, description: States India's forest cover increased to 69.8 million hectares by 2012.\n", + "id: Yajnavalkya Smriti, type: Literature, description: Prohibited the cutting of trees.\n", + "id: Kautalyas Arthashastra, type: Literature, description: Discusses the need for forest administration.\n", + "id: Crown Land (Encroachment) Ordinance, type: Legislation, description: Promulgated in 1840 targeting forests in Britain's Asian colonies.\n", + "id: Indian Forest Act Of 1865, type: Legislation, description: Established the government's claims over forests.\n", + "id: Forest Act Of 1878, type: Legislation, description: Gave the British government control over all wastelands, including forests.\n", + "id: Sir Dietrich Brandis, type: Person, description: Inspector General of Forests in India from 1864 to 1883.\n", + "id: Indian Ocean, type: Ocean, description: Ocean bounded by India on the north.\n", + "id: Sri Lanka, type: Country, description: Country in the vicinity of India in the Indian Ocean.\n", + "id: Maldives, type: Country, description: Country in the vicinity of India in the Indian Ocean.\n", + "id: Myanmar, type: Country, description: Country sharing land border with India to the east.\n", + "id: Himalayan Mountain Range, type: Mountain range, description: Defines the northern frontiers of India.\n", + "id: China, type: Country, description: Country sharing land border with India to the north.\n", + "id: Bhutan, type: Country, description: Country sharing land border with India to the north.\n", + "id: Nepal, type: Country, description: Country sharing land border with India to the north.\n", + "id: Pakistan, type: Country, description: Country sharing land border with India to the west.\n", + "id: Bangladesh, type: Country, description: Country sharing land border with India to the east.\n", + "id: Ganges, type: River, description: The longest river originating in India.\n", + "id: Western Ghats, type: Biodiversity hotspot, description: One of the three biodiversity hotspots in India.\n", + "id: Eastern Himalayas, type: Biodiversity hotspot, description: One of the three biodiversity hotspots in India.\n", + "id: Indo-Burma Hotspot, type: Biodiversity hotspot, description: One of the three biodiversity hotspots in India.\n", + "id: Forests, type: Ecosystem, description: Covers about 24.6% of the total land area.\n", + "id: Indomalayan Realm, type: Realm, description: Geographical realm that includes India.\n", + "id: World, type: None\n", + "id: Public_Sector, type: None\n", + "id: Gdp, type: None\n", + "id: Economic_Liberalisation, type: None\n", + "id: Gdp_Growth, type: None\n", + "id: Consumer_Market, type: None\n", + "id: Wto, type: None\n", + "id: Ease_Of_Business_Index, type: None\n", + "id: Global_Competitiveness_Index, type: None\n", + "id: Billionaires, type: None\n", + "id: Welfare_State, type: None\n", + "id: Socialist_State, type: None\n", + "id: Social_Welfare_Spending, type: None\n", + "id: Labour_Force, type: None\n", + "id: Fdi, type: None\n", + "id: Free_Trade_Agreements, type: None\n", + "id: Service_Sector, type: None\n", + "id: Stock_Exchanges, type: None\n", + "id: Manufacturing, type: None\n", + "id: Population, type: None\n", + "id: Unemployment, type: None\n", + "id: Savings_Rate, type: None\n", + "id: Arabian Sea, type: Sea, description: Sea bounded by India on the northwest.\n", + "id: Bay Of Bengal, type: Bay, description: Bay bounded by India on the southeast.\n", + "id: Africa, type: Continent, description: Continent from which modern humans arrived on the Indian subcontinent.\n", + "id: Indus Valley Civilisation, type: Civilization, description: Civilisation that evolved from settled life in the western margins of the Indus river basin.\n", + "id: Sanskrit, type: Language, description: Indo-European language that diffused into India from the northwest.\n", + "id: Rigveda, type: Text, description: Ancient hymns recording the dawning of Hinduism in India.\n", + "id: Hinduism, type: Religion, description: Major religion in India, with roots in the Rigveda.\n", + "id: Buddhism, type: Religion, description: Religion that arose in India, proclaiming social orders unlinked to heredity.\n", + "id: Jainism, type: Religion, description: Religion that arose in India, proclaiming social orders unlinked to heredity.\n", + "id: Mughal Empire, type: Empire, description: Empire that began in 1526, known for its architecture and relative peace.\n", + "id: Indian Republic, type: Government, description: India has been a federal republic since 1950.\n", + "\n", + "Relationships are:\n", + "(India)-[:IS_PART_OF]->(Indomalayan Realm)\n", + "(India)-[:HAS]->(Public_Sector), description: About 1,900 public sector companies with complete control and ownership of railways, highways, and majority control in various industries.\n", + "(India)-[:HAS]->(Billionaires), description: One of the world's highest number of billionaires.\n", + "(India)-[:HAS]->(Labour_Force), description: World's second-largest labour force with 586 million workers.\n", + "(India)-[:HAS]->(Free_Trade_Agreements), description: Has free trade agreements with several nations and blocs.\n", + "(India)-[:HAS]->(Stock_Exchanges), description: Bombay Stock Exchange and National Stock Exchange are among the world's largest stock exchanges.\n", + "(India)-[:HAS]->(Population), description: Nearly 65% of India's population is rural.\n", + "(India)-[:RELATES_TO]->(Rigveda)\n", + "(India)-[:MEMBER_OF]->(Wto), description: Member of the World Trade Organization since 1 January 1995.\n", + "(India)-[:FACES]->(Unemployment), description: High unemployment and rising income inequality.\n", + "(India)-[:ADOPTED]->(Economic_Liberalisation), description: Adopted broad economic liberalisation in 1991.\n", + "(India)-[:CONSIDERED_AS]->(Welfare_State), description: Often considered a welfare state.\n", + "(India)-[:NEAR]->(Sri Lanka)\n", + "(India)-[:NEAR]->(Maldives)\n", + "(India)-[:BORDERS]->(Indian Ocean)\n", + "(India)-[:BORDERS]->(Myanmar)\n", + "(India)-[:BORDERS]->(Himalayan Mountain Range)\n", + "(India)-[:BORDERS]->(China)\n", + "(India)-[:BORDERS]->(Bhutan)\n", + "(India)-[:BORDERS]->(Nepal)\n", + "(India)-[:BORDERS]->(Pakistan)\n", + "(India)-[:BORDERS]->(Bangladesh)\n", + "(India)-[:BORDERS]->(Arabian Sea)\n", + "(India)-[:BORDERS]->(Bay Of Bengal)\n", + "(India)-[:SEPARATED_BY]->(Sri Lanka)\n", + "(India)-[:DERIVED_FROM]->(Sindhu)\n", + "(India)-[:ESTIMATES]->(Food And Agriculture Organization Of The United Nations), description: Estimates India's forest cover.\n", + "(India)-[:MAKES_UP]->(Service_Sector), description: Makes up more than 50% of GDP.\n", + "(India)-[:RANKS_AS]->(World), description: Fifth-largest economy by nominal GDP and third-largest by purchasing power parity (PPP).\n", + "(India)-[:RANKS_AS]->(Consumer_Market), description: Fourth-largest consumer market in the world.\n", + "(India)-[:RANKS_AS]->(Manufacturing), description: World's sixth-largest manufacturer, representing 2.6% of global manufacturing output.\n", + "(India)-[:DECLARED_AS]->(Socialist_State), description: Officially declared a socialist state as per the constitution.\n", + "(India)-[:ORIGIN_OF]->(Hinduism)\n", + "(India)-[:ORIGIN_OF]->(Buddhism)\n", + "(India)-[:ORIGIN_OF]->(Jainism)\n", + "(India)-[:GOVERNED_AS]->(Indian Republic)\n", + "(India)-[:RANKS]->(Gdp), description: 136th by GDP (nominal) and 125th by GDP (PPP) on a per capita income basis.\n", + "(India)-[:RANKS]->(Ease_Of_Business_Index), description: 63rd on the Ease of Doing Business index.\n", + "(India)-[:RANKS]->(Global_Competitiveness_Index), description: 40th on the Global Competitiveness Index.\n", + "(India)-[:SHARES_BORDERS_WITH]->(Myanmar)\n", + "(India)-[:SPENDING]->(Social_Welfare_Spending), description: Overall social welfare spending stood at 8.6% of GDP in 2021-22.\n", + "(India)-[:HISTORICAL_EVENT]->(Mughal Empire)\n", + "(India)-[:ORIGINATES_IN]->(Ganges)\n", + "(India)-[:HAS_BIODIVERSITY_HOTSPOT]->(Western Ghats)\n", + "(India)-[:HAS_BIODIVERSITY_HOTSPOT]->(Eastern Himalayas)\n", + "(India)-[:HAS_BIODIVERSITY_HOTSPOT]->(Indo-Burma Hotspot)\n", + "(India)-[:HAS_ECOSYSTEM]->(Forests)\n", + "(India)-[:AVERAGE_ANNUAL_GROWTH]->(Gdp_Growth), description: 6% to 7% since the start of the 21st century.\n", + "(India)-[:FOREIGN_DIRECT_INVESTMENT]->(Fdi), description: Foreign direct investment in 2021-22 was $82 billion.\n", + "(India)-[:GROSS_DOMESTIC_SAVINGS_RATE]->(Savings_Rate), description: Gross domestic savings rate stood at 29.3% of GDP in 2022.\n", + "(India)-[:HUMAN_ORIGIN]->(Africa)\n", + "(India)-[:HISTORICAL_DEVELOPMENT]->(Indus Valley Civilisation)\n", + "(India)-[:LANGUAGE_DIFFUSION]->(Sanskrit)\n", + "(Kautalyas Arthashastra)-[:DISCUSSES]->(India), description: Discusses the need for forest administration.\n", + "(Indian Forest Act Of 1865)-[:ESTABLISHES]->(India), description: Establishes government's claims over forests.\n", + "(Crown Land (Encroachment) Ordinance)-[:TARGETS]->(India), description: Targets forests in Britain's Asian colonies.\n", + "(Yajnavalkya Smriti)-[:PROHIBITS]->(India), description: Prohibits the cutting of trees.\n", + "(2013 Forest Survey Of India)-[:REPORTS_ON]->(India), description: Reports on forest cover increase.\n", + "(Supreme Court Of India)-[:PART_OF_JUDICIARY]->(India)\n", + "(Indian National Congress)-[:HISTORICALLY_DOMINANT_PARTY]->(India)\n", + "(Forest Act Of 1878)-[:GIVES_CONTROL]->(India), description: Gives control over all wastelands, including forests.\n", + "(Sir Dietrich Brandis)-[:INSPECTOR_GENERAL_OF]->(India), description: Inspector General of Forests in India from 1864 to 1883.\n", + "\n" + ] + } + ], + "source": [ + "print(prepare_string(community_info[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing communities: 0%| | 0/30 [00:00 pd.DataFrame:\n", + " \"\"\"Executes a Cypher statement and returns a DataFrame\"\"\"\n", + " return driver.execute_query(\n", + " cypher, parameters_=params, result_transformer_=Result.to_df\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Calculate before \n", + "db_query(\n", + " \"\"\"\n", + "MATCH (n:`__Community__`)<-[:IN_COMMUNITY]-()<-[:HAS_ENTITY]-(c)\n", + "WITH n, count(distinct c) AS chunkCount\n", + "SET n.weight = chunkCount\"\"\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## local and global search" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "index_name = \"entity_local\"\n", + "\n", + "# db_query(\n", + "# \"\"\"\n", + "# CREATE VECTOR INDEX \"\"\"\n", + "# + index_name\n", + "# + \"\"\" IF NOT EXISTS FOR (e:__Entity__) ON e.embedding\n", + "# OPTIONS {indexConfig: {\n", + "# `vector.dimensions`: 384,\n", + "# `vector.similarity_function`: 'cosine'\n", + "# }}\n", + "# \"\"\"\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "topChunks = 3\n", + "topCommunities = 3\n", + "topOutsideRels = 10\n", + "topInsideRels = 10\n", + "topEntities = 10" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "lc_retrieval_query = \"\"\"\n", + "WITH collect(node) as nodes\n", + "// Entity - Text Unit Mapping\n", + "WITH\n", + "collect {\n", + " UNWIND nodes as n\n", + " MATCH (n)<-[:HAS_ENTITY]->(c:Chunk)\n", + " WITH c, count(distinct n) as freq\n", + " RETURN c.text AS chunkText\n", + " ORDER BY freq DESC\n", + " LIMIT $topChunks\n", + "} AS text_mapping,\n", + "// Entity - Report Mapping\n", + "collect {\n", + " UNWIND nodes as n\n", + " MATCH (n)-[:IN_COMMUNITY]->(c:__Community__)\n", + " WITH c, c.rank as rank, c.weight AS weight\n", + " RETURN c.summary \n", + " ORDER BY rank, weight DESC\n", + " LIMIT $topCommunities\n", + "} AS report_mapping,\n", + "// Outside Relationships \n", + "collect {\n", + " UNWIND nodes as n\n", + " MATCH (n)-[r:RELATED]-(m) \n", + " WHERE NOT m IN nodes\n", + " RETURN r.description AS descriptionText\n", + " ORDER BY r.rank, r.weight DESC \n", + " LIMIT $topOutsideRels\n", + "} as outsideRels,\n", + "// Inside Relationships \n", + "collect {\n", + " UNWIND nodes as n\n", + " MATCH (n)-[r:RELATED]-(m) \n", + " WHERE m IN nodes\n", + " RETURN r.description AS descriptionText\n", + " ORDER BY r.rank, r.weight DESC \n", + " LIMIT $topInsideRels\n", + "} as insideRels,\n", + "// Entities description\n", + "collect {\n", + " UNWIND nodes as n\n", + " RETURN n.description AS descriptionText\n", + "} as entities\n", + "// We don't have covariates or claims here\n", + "RETURN {Chunks: text_mapping, Reports: report_mapping, \n", + " Relationships: outsideRels + insideRels, \n", + " Entities: entities} AS text, 1.0 AS score, {} AS metadata\n", + "\"\"\"\n", + "\n", + "embeddings, dimension = load_embedding_model(\"sentence_transformer\")\n", + "lc_vector = Neo4jVector.from_existing_index(\n", + " embeddings,\n", + " url=os.environ[\"NEO4J_URI\"],\n", + " username=os.environ[\"NEO4J_USERNAME\"],\n", + " password=os.environ[\"NEO4J_PASSWORD\"],\n", + " index_name=index_name,\n", + " retrieval_query=lc_retrieval_query,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "docs = lc_vector.similarity_search_with_score(\n", + " \"Major cities of India\",\n", + " k=topEntities,\n", + " params={\n", + " \"topChunks\": topChunks,\n", + " \"topCommunities\": topCommunities,\n", + " \"topOutsideRels\": topOutsideRels,\n", + " \"topInsideRels\": topInsideRels,\n", + " },\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Entities:\n", + "- None\n", + "- None\n", + "- None\n", + "- One of the three biodiversity hotspots in India.\n", + "- Officially the Republic of India, located in South Asia.\n", + "- Geographical realm that includes India.\n", + "- One of the three biodiversity hotspots in India.\n", + "- Defines the northern frontiers of India.\n", + "- One of the three biodiversity hotspots in India.\n", + "- Commonly used name for the Republic of India.\n", + "Reports:\n", + "- India, officially the Republic of India, is a country located in South Asia and is part of the Indomalayan Realm. It shares borders with several countries including China, Bhutan, Nepal, Pakistan, Bangladesh, and Myanmar, and is bounded by the Indian Ocean, Arabian Sea, and Bay of Bengal. India is known for its rich biodiversity, featuring hotspots like the Western Ghats, Eastern Himalayas, and Indo-Burma Hotspot, and has a significant forest cover, which has been increasing as reported by the 2013 Forest Survey of India.\n", + "\n", + "India has a complex history, with ancient civilizations like the Indus Valley Civilisation and significant cultural contributions such as the Rigveda, which marks the dawn of Hinduism. It is also the birthplace of Buddhism and Jainism. The country has a diverse linguistic heritage, including the diffusion of the Indo-European language Sanskrit.\n", + "\n", + "Economically, India is a major global player, ranking as the fifth-largest economy by nominal GDP and third-largest by purchasing power parity (PPP). It has a significant public sector, a large labor force, and is a member of the World Trade Organization. The country has adopted broad economic liberalization since 1991, leading to substantial GDP growth and a robust service sector that makes up more than 50% of its GDP. India also has a high number of billionaires and is considered a welfare state with significant social welfare spending.\n", + "\n", + "Politically, India has been a federal republic since 1950, with the Indian National Congress historically dominating its politics until 1977. The Supreme Court of India heads its independent judiciary. The country has a rich legislative history concerning forest administration, with significant acts like the Indian Forest Act of 1865 and the Forest Act of 1878, and contributions from figures like Sir Dietrich Brandis, the Inspector General of Forests from 1864 to 1883.\n", + "\n", + "India's geographical and cultural landscape is further enriched by its rivers, such as the Ganges and Sindhu (Indus River), and its proximity to neighboring countries like Sri Lanka and the Maldives. The country is also part of the larger Indomalayan Realm and has historical ties to Africa, from where modern humans arrived on the Indian subcontinent.\n", + "- India, officially the Republic of India, is a country located in South Asia and is part of the Indomalayan Realm. It shares borders with several countries including China, Bhutan, Nepal, Pakistan, Bangladesh, and Myanmar, and is bounded by the Indian Ocean, Arabian Sea, and Bay of Bengal. India is known for its rich biodiversity, featuring hotspots like the Western Ghats, Eastern Himalayas, and Indo-Burma Hotspot, and has a significant forest cover, which has been increasing as reported by the 2013 Forest Survey of India.\n", + "\n", + "India has a complex history, with ancient civilizations like the Indus Valley Civilisation and significant cultural contributions such as the Rigveda, which marks the dawn of Hinduism. It is also the birthplace of Buddhism and Jainism. The country has a diverse linguistic heritage, including the diffusion of the Indo-European language Sanskrit.\n", + "\n", + "Economically, India is a major global player, ranking as the fifth-largest economy by nominal GDP and third-largest by purchasing power parity (PPP). It has a significant public sector, a large labor force, and is a member of the World Trade Organization. The country has adopted broad economic liberalization since 1991, leading to substantial GDP growth and a robust service sector that makes up more than 50% of its GDP. India also has a high number of billionaires and is considered a welfare state with significant social welfare spending.\n", + "\n", + "Politically, India has been a federal republic since 1950, with the Indian National Congress historically dominating its politics until 1977. The Supreme Court of India heads its independent judiciary. The country has a rich legislative history concerning forest administration, with significant acts like the Indian Forest Act of 1865 and the Forest Act of 1878, and contributions from figures like Sir Dietrich Brandis, the Inspector General of Forests from 1864 to 1883.\n", + "\n", + "India's geographical and cultural landscape is further enriched by its rivers, such as the Ganges and Sindhu (Indus River), and its proximity to neighboring countries like Sri Lanka and the Maldives. The country is also part of the larger Indomalayan Realm and has historical ties to Africa, from where modern humans arrived on the Indian subcontinent.\n", + "- India, officially the Republic of India, is a country located in South Asia and is part of the Indomalayan Realm. It shares borders with several countries including China, Bhutan, Nepal, Pakistan, Bangladesh, and Myanmar, and is bounded by the Indian Ocean, Arabian Sea, and Bay of Bengal. India is known for its rich biodiversity, featuring hotspots like the Western Ghats, Eastern Himalayas, and Indo-Burma Hotspot, and has a significant forest cover, which has been increasing as reported by the 2013 Forest Survey of India.\n", + "\n", + "India has a complex history, with ancient civilizations like the Indus Valley Civilisation and significant cultural contributions such as the Rigveda, which marks the dawn of Hinduism. It is also the birthplace of Buddhism and Jainism. The country has a diverse linguistic heritage, including the diffusion of the Indo-European language Sanskrit.\n", + "\n", + "Economically, India is a major global player, ranking as the fifth-largest economy by nominal GDP and third-largest by purchasing power parity (PPP). It has a significant public sector, a large labor force, and is a member of the World Trade Organization. The country has adopted broad economic liberalization since 1991, leading to substantial GDP growth and a robust service sector that makes up more than 50% of its GDP. India also has a high number of billionaires and is considered a welfare state with significant social welfare spending.\n", + "\n", + "Politically, India has been a federal republic since 1950, with the Indian National Congress historically dominating its politics until 1977. The Supreme Court of India heads its independent judiciary. The country has a rich legislative history concerning forest administration, with significant acts like the Indian Forest Act of 1865 and the Forest Act of 1878, and contributions from figures like Sir Dietrich Brandis, the Inspector General of Forests from 1864 to 1883.\n", + "\n", + "India's geographical and cultural landscape is further enriched by its rivers, such as the Ganges and Sindhu (Indus River), and its proximity to neighboring countries like Sri Lanka and the Maldives. The country is also part of the larger Indomalayan Realm and has historical ties to Africa, from where modern humans arrived on the Indian subcontinent.\n", + "Chunks:\n", + "- India is one of the most biodiverse regions and is home to a large variety of wildlife. It is one of the 17 megadiverse countries and includes three of the worlds 36 biodiversity hotspots – the Western Ghats, the Eastern Himalayas, and the Indo-Burma hotspot. About 24.6% of the total land area is covered by forests. It has various ecosystems ranging from the high altitude Himalayas, tropical evergreen forests along the Western Ghats, desert in the north-west, coastal plains and mangroves along the peninsular region. India lies within the Indomalayan realm and is home to about 7.6% of mammal, 14.7% of amphibian, 6% of bird, 6.2% of reptilian, and 6.2% of flowering plant species. Human encroachment, deforestation and poaching are significant challenges that threaten the existence of certain fauna and flora. Government of India established a system of national parks and\n", + "- threaten the existence of certain fauna and flora. Government of India established a system of national parks and protected areas in 1935, which have been subsequently expanded to nearly 1022 protected areas by 2023. India has enacted the Wildlife Protection Act of 1972 and special projects such as Project Tiger, Project Elephant and Project Dolphin for protection of critical species. == Fauna == India has an estimated 92,873 species of fauna, roughly about 7.5% of the species available worldwide. Insects form the major category with 63423 recorded species. India is home to 423 mammals, 1233 birds, 526 reptiles, 342 amphibians, 3022 fish apart from other species which form 7.6% of mammal, 14.7% of amphibian, 6% of bird, 6.2% of reptilian species worldwide. Many Indian species are descendants of species originating in Gondwana, of which India originally was a part. Peninsular Indias subsequent movement towards\n", + "- Gondwana, of which India originally was a part. Peninsular Indias subsequent movement towards, and collision with, the Laurasian landmass set off a mass exchange of species. However, volcanism in the Deccan Traps and climatic change 20 million years ago caused the extinction of many endemic Indian forms. Soon thereafter, mammals entered India from Asia through two zoogeographical passes on either side of the emerging Himalayas. As a result, among Indian species, only 12.6% of mammals and 4.5% of birds are endemic, contrasting with 45.8% of reptiles and 55.8% of amphibians India is home to several well-known large animals, including the Indian elephant, Indian rhinoceros, and Gaur. India is the only country where the big cats tiger and lion exist in the wild. Members of the cat family include Bengal tiger, Asiatic lion, Indian leopard, snow leopard, and clouded\n", + "Relationships:\n", + "\n" + ] + } + ], + "source": [ + "print(docs[0][0].page_content)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "model = \"openai-gpt-4o\"\n", + "llm, model_name = get_llm(model)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "MAP_SYSTEM_PROMPT = \"\"\"\n", + "---Role---\n", + "\n", + "You are a helpful assistant responding to questions about data in the tables provided.\n", + "\n", + "\n", + "---Goal---\n", + "\n", + "Generate a response consisting of a list of key points that responds to the user's question, summarizing all relevant information in the input data tables.\n", + "\n", + "You should use the data provided in the data tables below as the primary context for generating the response.\n", + "If you don't know the answer or if the input data tables do not contain sufficient information to provide an answer, just say so. Do not make anything up.\n", + "\n", + "Each key point in the response should have the following element:\n", + "- Description: A comprehensive description of the point.\n", + "- Importance Score: An integer score between 0-100 that indicates how important the point is in answering the user's question. An 'I don't know' type of response should have a score of 0.\n", + "\n", + "The response should be JSON formatted as follows:\n", + "{{\n", + " \"points\": [\n", + " {{\"description\": \"Description of point 1 [Data: Reports (report ids)]\", \"score\": score_value}},\n", + " {{\"description\": \"Description of point 2 [Data: Reports (report ids)]\", \"score\": score_value}}\n", + " ]\n", + "}}\n", + "\n", + "The response shall preserve the original meaning and use of modal verbs such as \"shall\", \"may\" or \"will\".\n", + "\n", + "Points supported by data should list the relevant reports as references as follows:\n", + "\"This is an example sentence supported by data references [Data: Reports (report ids)]\"\n", + "\n", + "**Do not list more than 5 record ids in a single reference**. Instead, list the top 5 most relevant record ids and add \"+more\" to indicate that there are more.\n", + "\n", + "For example:\n", + "\"Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (2, 7, 64, 46, 34, +more)]. He is also CEO of company X [Data: Reports (1, 3)]\"\n", + "\n", + "where 1, 2, 3, 7, 34, 46, and 64 represent the id (not the index) of the relevant data report in the provided tables.\n", + "\n", + "Do not include information where the supporting evidence for it is not provided.\n", + "\n", + "\n", + "---Data tables---\n", + "\n", + "{context_data}\n", + "\n", + "---Goal---\n", + "\n", + "Generate a response consisting of a list of key points that responds to the user's question, summarizing all relevant information in the input data tables.\n", + "\n", + "You should use the data provided in the data tables below as the primary context for generating the response.\n", + "If you don't know the answer or if the input data tables do not contain sufficient information to provide an answer, just say so. Do not make anything up.\n", + "\n", + "Each key point in the response should have the following element:\n", + "- Description: A comprehensive description of the point.\n", + "- Importance Score: An integer score between 0-100 that indicates how important the point is in answering the user's question. An 'I don't know' type of response should have a score of 0.\n", + "\n", + "The response shall preserve the original meaning and use of modal verbs such as \"shall\", \"may\" or \"will\".\n", + "\n", + "Points supported by data should list the relevant reports as references as follows:\n", + "\"This is an example sentence supported by data references [Data: Reports (report ids)]\"\n", + "\n", + "**Do not list more than 5 record ids in a single reference**. Instead, list the top 5 most relevant record ids and add \"+more\" to indicate that there are more.\n", + "\n", + "For example:\n", + "\"Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (2, 7, 64, 46, 34, +more)]. He is also CEO of company X [Data: Reports (1, 3)]\"\n", + "\n", + "where 1, 2, 3, 7, 34, 46, and 64 represent the id (not the index) of the relevant data report in the provided tables.\n", + "\n", + "Do not include information where the supporting evidence for it is not provided.\n", + "\n", + "The response should be JSON formatted as follows:\n", + "{{\n", + " \"points\": [\n", + " {{\"description\": \"Description of point 1 [Data: Reports (report ids)]\", \"score\": score_value}},\n", + " {{\"description\": \"Description of point 2 [Data: Reports (report ids)]\", \"score\": score_value}}\n", + " ]\n", + "}}\n", + "\"\"\"\n", + "\n", + "map_prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " MAP_SYSTEM_PROMPT,\n", + " ),\n", + " (\n", + " \"human\",\n", + " \"{question}\",\n", + " ),\n", + " ]\n", + ")\n", + "\n", + "map_chain = map_prompt | llm | StrOutputParser()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "REDUCE_SYSTEM_PROMPT = \"\"\"\n", + "---Role---\n", + "\n", + "You are a helpful assistant responding to questions about a dataset by synthesizing perspectives from multiple analysts.\n", + "\n", + "\n", + "---Goal---\n", + "\n", + "Generate a response of the target length and format that responds to the user's question, summarize all the reports from multiple analysts who focused on different parts of the dataset.\n", + "\n", + "Note that the analysts' reports provided below are ranked in the **descending order of importance**.\n", + "\n", + "If you don't know the answer or if the provided reports do not contain sufficient information to provide an answer, just say so. Do not make anything up.\n", + "\n", + "The final response should remove all irrelevant information from the analysts' reports and merge the cleaned information into a comprehensive answer that provides explanations of all the key points and implications appropriate for the response length and format.\n", + "\n", + "Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown.\n", + "\n", + "The response shall preserve the original meaning and use of modal verbs such as \"shall\", \"may\" or \"will\".\n", + "\n", + "The response should also preserve all the data references previously included in the analysts' reports, but do not mention the roles of multiple analysts in the analysis process.\n", + "\n", + "**Do not list more than 5 record ids in a single reference**. Instead, list the top 5 most relevant record ids and add \"+more\" to indicate that there are more.\n", + "\n", + "For example:\n", + "\n", + "\"Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (2, 7, 34, 46, 64, +more)]. He is also CEO of company X [Data: Reports (1, 3)]\"\n", + "\n", + "where 1, 2, 3, 7, 34, 46, and 64 represent the id (not the index) of the relevant data record.\n", + "\n", + "Do not include information where the supporting evidence for it is not provided.\n", + "\n", + "\n", + "---Target response length and format---\n", + "\n", + "{response_type}\n", + "\n", + "\n", + "---Analyst Reports---\n", + "\n", + "{report_data}\n", + "\n", + "\n", + "---Goal---\n", + "\n", + "Generate a response of the target length and format that responds to the user's question, summarize all the reports from multiple analysts who focused on different parts of the dataset.\n", + "\n", + "Note that the analysts' reports provided below are ranked in the **descending order of importance**.\n", + "\n", + "If you don't know the answer or if the provided reports do not contain sufficient information to provide an answer, just say so. Do not make anything up.\n", + "\n", + "The final response should remove all irrelevant information from the analysts' reports and merge the cleaned information into a comprehensive answer that provides explanations of all the key points and implications appropriate for the response length and format.\n", + "\n", + "The response shall preserve the original meaning and use of modal verbs such as \"shall\", \"may\" or \"will\".\n", + "\n", + "The response should also preserve all the data references previously included in the analysts' reports, but do not mention the roles of multiple analysts in the analysis process.\n", + "\n", + "**Do not list more than 5 record ids in a single reference**. Instead, list the top 5 most relevant record ids and add \"+more\" to indicate that there are more.\n", + "\n", + "For example:\n", + "\n", + "\"Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (2, 7, 34, 46, 64, +more)]. He is also CEO of company X [Data: Reports (1, 3)]\"\n", + "\n", + "where 1, 2, 3, 7, 34, 46, and 64 represent the id (not the index) of the relevant data record.\n", + "\n", + "Do not include information where the supporting evidence for it is not provided.\n", + "\n", + "\n", + "---Target response length and format---\n", + "\n", + "{response_type}\n", + "\n", + "Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown.\n", + "\"\"\"\n", + "\n", + "reduce_prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " REDUCE_SYSTEM_PROMPT,\n", + " ),\n", + " (\n", + " \"human\",\n", + " \"{question}\",\n", + " ),\n", + " ]\n", + ")\n", + "reduce_chain = reduce_prompt | llm | StrOutputParser()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "graph = Neo4jGraph(\n", + " url=os.environ[\"NEO4J_URI\"],\n", + " username=os.environ[\"NEO4J_USERNAME\"],\n", + " password=os.environ[\"NEO4J_PASSWORD\"],\n", + " refresh_schema=False,\n", + ")\n", + "\n", + "response_type: str = \"multiple paragraphs\"\n", + "\n", + "\n", + "def global_retriever(query: str, level: int, response_type: str = response_type) -> str:\n", + " community_data = graph.query(\n", + " \"\"\"\n", + " MATCH (c:__Community__)\n", + " WHERE c.level = $level\n", + " RETURN c.full_content AS output\n", + " \"\"\",\n", + " params={\"level\": level},\n", + " )\n", + " intermediate_results = []\n", + " for community in tqdm(community_data, desc=\"Processing communities\"):\n", + " intermediate_response = map_chain.invoke(\n", + " {\"question\": query, \"context_data\": community[\"output\"]}\n", + " )\n", + " intermediate_results.append(intermediate_response)\n", + " final_response = reduce_chain.invoke(\n", + " {\n", + " \"report_data\": intermediate_results,\n", + " \"question\": query,\n", + " \"response_type\": response_type,\n", + " }\n", + " )\n", + " return final_response" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.UnknownPropertyKeyWarning} {category: UNRECOGNIZED} {title: The provided property key is not in the database} {description: One of the property names in your query is not available in the database, make sure you didn't misspell it or that the label is available when you run this statement in your application (the missing property name is: full_content)} {position: line: 4, column: 14, offset: 69} for query: '\\n MATCH (c:__Community__)\\n WHERE c.level = $level\\n RETURN c.full_content AS output\\n '\n", + "Processing communities: 0it [00:00, ?it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "I'm sorry, but the provided reports do not contain sufficient information to provide an answer to your question about the major cities of India. If you have any other questions or need information on a different topic, please let me know!\n" + ] + } + ], + "source": [ + "print(global_retriever(\"Major cities of India?\", 2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chatbotenv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From e078e214b4ffe675274ebcc040bbfcb8e4f812d8 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Tue, 20 Aug 2024 12:29:34 +0000 Subject: [PATCH 003/292] connection _check --- backend/score.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/score.py b/backend/score.py index 10bb52738..4daffbc73 100644 --- a/backend/score.py +++ b/backend/score.py @@ -360,7 +360,7 @@ async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=F async def connect(uri=Form(), userName=Form(), password=Form(), database=Form()): try: graph = create_graph_database_connection(uri, userName, password, database) - result = await asyncio.to_thread(connection_check, graph) + result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph) json_obj = {'api_name':'connect','db_url':uri,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(json_obj) return create_api_response('Success',message=result) From 7c66bf23fc8ce6a9978645273055bdfaad827460 Mon Sep 17 00:00:00 2001 From: destiny966113 <90891243+destiny966113@users.noreply.github.com> Date: Tue, 20 Aug 2024 18:49:04 +0800 Subject: [PATCH 004/292] Fix typo: correct 'josn_obj' to 'json_obj' (#697) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * format fixes * Close connect when graph object is not none * Call garbage collector to release the menory * Change error message * Added driver config as user_agent * Updated doc for the LLM_MODELS and GCS_FILE_CACHE (#473) * Web URLs are user input (#475) * web url support backend * added the tabs for input source * user agent added for Neo4jGraph connection * Tab view for sources * extract handling for web ur's * initial input handling * chunk creation before processing * code structure * format fixes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * changed the regex for web and cancel button naming * changed the schema dropdown type * readme updates * PROD version fix * changed the alert message for gcs * Delete unconnected entities from DB (#482) * 457 add schema before generate graph (#478) * schema setting from generate graph * changes * changes * badge changes * bug fix * Fulltext index and Update similarity graph (#479) * added full_text index * added one common function for post_processing * post processing api * added tasks param * modifed logging * post processing changes --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Graph and vector search (#485) * Modified the retrival query * added the chatmode toggle component * Modified to vector search * Moved the templates to constants * added the icons * added chat modes * code structure changes * Intergrated the API changges * Modified retrieval queries,refactored code * API integration changes * added the score * order change * wording change * modified constants * added graph+vector * added the tooltips * Modified query * removed the graph mode * tooltip camel Case * added the icon and extern link for web source in the info modal * added the youtube link in the source used tab * format fixes * added the hoverable link --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Update InfoModal.tsx * removed hover from chunks * removed page number * removed page number * removed pag… * Dev To STAGING (#532) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#535) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * staging changes to main * Update README.md (#539) * docs: update README.md (#546) specifiy -> specify * Fix typo: correct 'josn_obj' to 'json_obj' --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> Co-authored-by: Ikko Eltociear Ashimine Co-authored-by: Andy --- backend/score.py | 69 +++++++++---------- .../src/components/Graph/GraphViewModal.tsx | 2 +- frontend/src/components/Graph/LegendsChip.tsx | 2 +- frontend/src/components/Layout/PageLayout.tsx | 2 +- .../Popups/DeletePopUp/DeletePopUp.tsx | 2 +- .../DeleteTabForOrphanNodes/index.tsx | 2 +- .../Popups/GraphEnhancementDialog/index.tsx | 2 +- .../Popups/LargeFilePopUp/LargeFilesAlert.tsx | 2 +- frontend/src/components/QuickStarter.tsx | 5 +- 9 files changed, 45 insertions(+), 43 deletions(-) diff --git a/backend/score.py b/backend/score.py index 6f5113db6..10bb52738 100644 --- a/backend/score.py +++ b/backend/score.py @@ -105,8 +105,8 @@ async def create_source_knowledge_graph_url( return create_api_response('Failed',message='source_type is other than accepted source') message = f"Source Node created successfully for source type: {source_type} and source: {source}" - josn_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response("Success",message=message,success_count=success_count,failed_count=failed_count,file_name=lst_file_name) except Exception as e: error_message = str(e) @@ -208,9 +208,9 @@ async def extract_knowledge_graph_from_file( else: logging.info(f'Deleted File Path: {merged_file_path} and Deleted File Name : {file_name}') delete_uploaded_local_file(merged_file_path,file_name) - josn_obj = {'message':message,'error_message':error_message, 'file_name': file_name,'status':'Failed','db_url':uri,'failed_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) - logging.exception(f'File Failed in extraction: {josn_obj}') + json_obj = {'message':message,'error_message':error_message, 'file_name': file_name,'status':'Failed','db_url':uri,'failed_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) + logging.exception(f'File Failed in extraction: {json_obj}') return create_api_response('Failed', message=message + error_message[:100], error=error_message, file_name = file_name) finally: gc.collect() @@ -225,8 +225,8 @@ async def get_source_list(uri:str, userName:str, password:str, database:str=None if " " in uri: uri = uri.replace(" ","+") result = await asyncio.to_thread(get_source_list_from_graph,uri,userName,decoded_password,database) - josn_obj = {'api_name':'sources_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'sources_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response("Success",data=result) except Exception as e: job_status = "Failed" @@ -243,19 +243,18 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database if "materialize_text_chunk_similarities" in tasks: await asyncio.to_thread(update_graph, graph) - josn_obj = {'api_name': 'post_processing/materialize_text_chunk_similarities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name': 'post_processing/update_similarity_graph', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) logging.info(f'Updated KNN Graph') - if "enable_hybrid_search_and_fulltext_search_in_bloom" in tasks: - await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="entities") - await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="keyword") - josn_obj = {'api_name': 'post_processing/enable_hybrid_search_and_fulltext_search_in_bloom', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + if "create_fulltext_index" in tasks: + await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database) + json_obj = {'api_name': 'post_processing/create_fulltext_index', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) logging.info(f'Full Text index created') if os.environ.get('ENTITY_EMBEDDING','False').upper()=="TRUE" and "materialize_entity_similarities" in tasks: await asyncio.to_thread(create_entity_embedding, graph) - josn_obj = {'api_name': 'post_processing/materialize_entity_similarities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) logging.info(f'Entity Embeddings created') return create_api_response('Success', message='All tasks completed successfully') @@ -284,8 +283,8 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), logging.info(f"Total Response time is {total_call_time:.2f} seconds") result["info"]["response_time"] = round(total_call_time, 2) - josn_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -301,8 +300,8 @@ async def chunk_entities(uri=Form(),userName=Form(), password=Form(), chunk_ids= try: logging.info(f"URI: {uri}, Username: {userName}, chunk_ids: {chunk_ids}") result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, chunk_ids=chunk_ids) - josn_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -329,8 +328,8 @@ async def graph_query( password=password, document_names=document_names ) - josn_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response('Success', data=result) except Exception as e: job_status = "Failed" @@ -361,10 +360,10 @@ async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=F async def connect(uri=Form(), userName=Form(), password=Form(), database=Form()): try: graph = create_graph_database_connection(uri, userName, password, database) - result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph) - josn_obj = {'api_name':'connect','db_url':uri,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) - return create_api_response('Success',data=result) + result = await asyncio.to_thread(connection_check, graph) + json_obj = {'api_name':'connect','db_url':uri,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) + return create_api_response('Success',message=result) except Exception as e: job_status = "Failed" message="Connection failed to connect Neo4j database" @@ -379,8 +378,8 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber try: graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(upload_file, graph, model, file, chunkNumber, totalChunks, originalname, uri, CHUNK_DIR, MERGED_DIR) - josn_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) if int(chunkNumber) == int(totalChunks): return create_api_response('Success',data=result, message='Source Node Created Successfully') else: @@ -401,8 +400,8 @@ async def get_structured_schema(uri=Form(), userName=Form(), password=Form(), da graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(get_labels_and_relationtypes, graph) logging.info(f'Schema result from DB: {result}') - josn_obj = {'api_name':'schema','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'schema','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response('Success', data=result) except Exception as e: message="Unable to get the labels and relationtypes from neo4j database" @@ -468,10 +467,10 @@ async def delete_document_and_entities(uri=Form(), graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) result, files_list_size = await asyncio.to_thread(graphDb_data_Access.delete_file_from_graph, filenames, source_types, deleteEntities, MERGED_DIR, uri) - # entities_count = result[0]['deletedEntities'] if 'deletedEntities' in result[0] else 0 - message = f"Deleted {files_list_size} documents with entities from database" - josn_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + entities_count = result[0]['deletedEntities'] if 'deletedEntities' in result[0] else 0 + message = f"Deleted {files_list_size} documents with {entities_count} entities from database" + json_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response('Success',message=message) except Exception as e: job_status = "Failed" @@ -627,4 +626,4 @@ async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), da gc.collect() if __name__ == "__main__": - uvicorn.run(app) \ No newline at end of file + uvicorn.run(app) diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 9c0d5d190..5479509a1 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -356,4 +356,4 @@ const GraphViewModal: React.FunctionComponent = ({ ); }; -export default GraphViewModal; +export default GraphViewModal; \ No newline at end of file diff --git a/frontend/src/components/Graph/LegendsChip.tsx b/frontend/src/components/Graph/LegendsChip.tsx index 7e121dc67..7d2de8b74 100644 --- a/frontend/src/components/Graph/LegendsChip.tsx +++ b/frontend/src/components/Graph/LegendsChip.tsx @@ -5,7 +5,7 @@ import Legend from '../UI/Legend'; export const LegendsChip: React.FunctionComponent = ({ scheme, title, nodes }) => { const chunkcount = useMemo( () => [...new Set(nodes?.filter((n) => n?.labels?.includes(title)).map((i) => i.id))].length, - [nodes] + [] ); return ; diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 8aca58109..1a2f755b4 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -175,4 +175,4 @@ export default function PageLayoutNew({ /> ); -} +} \ No newline at end of file diff --git a/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx b/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx index 1b1bd58c8..eb1522868 100644 --- a/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx +++ b/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx @@ -54,4 +54,4 @@ export default function DeletePopUp({ ); -} +} \ No newline at end of file diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx index 2dc8a13ff..9cdf239dc 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx @@ -279,4 +279,4 @@ export default function DeletePopUpForOrphanNodes({ ); -} +} \ No newline at end of file diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx index 84577c53c..5e2aaf713 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx @@ -126,4 +126,4 @@ export default function GraphEnhancementDialog({ ); -} +} \ No newline at end of file diff --git a/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx b/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx index 26c9cfcda..b2eb01a52 100644 --- a/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx +++ b/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx @@ -100,4 +100,4 @@ const LargeFilesAlert: FC = ({ largeFiles, handleToggle, checke ); }; -export default LargeFilesAlert; +export default LargeFilesAlert; \ No newline at end of file diff --git a/frontend/src/components/QuickStarter.tsx b/frontend/src/components/QuickStarter.tsx index db2cba7e7..bfbbaf55b 100644 --- a/frontend/src/components/QuickStarter.tsx +++ b/frontend/src/components/QuickStarter.tsx @@ -11,6 +11,7 @@ const QuickStarter: React.FunctionComponent = () => { const themeUtils = React.useContext(ThemeWrapperContext); const [themeMode, setThemeMode] = useState(themeUtils.colorMode); const [showSettingsModal, setshowSettingsModal] = useState(false); + const [showOrphanNodeDeletionDialog, setshowOrphanNodeDeletionDialog] = useState(false); const toggleColorMode = () => { setThemeMode((prevThemeMode) => { @@ -24,7 +25,6 @@ const QuickStarter: React.FunctionComponent = () => { const closeSettingModal = () => { setshowSettingsModal(false); }; - return ( @@ -35,6 +35,9 @@ const QuickStarter: React.FunctionComponent = () => { openSettingsDialog={openSettingsModal} isSettingPanelExpanded={showSettingsModal} closeSettingModal={closeSettingModal} + closeOrphanNodeDeletionModal={closeOrphanNodeDeletionModal} + showOrphanNodeDeletionModal={showOrphanNodeDeletionDialog} + openOrphanNodeDeletionModal={openOrphanNodeDeletionModal} /> From 477fda0bcfe5ed56edc7cd4e738ce63f0d74c691 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:57:15 +0000 Subject: [PATCH 005/292] lint fixes --- frontend/src/components/QuickStarter.tsx | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/frontend/src/components/QuickStarter.tsx b/frontend/src/components/QuickStarter.tsx index bfbbaf55b..29832fee1 100644 --- a/frontend/src/components/QuickStarter.tsx +++ b/frontend/src/components/QuickStarter.tsx @@ -11,7 +11,6 @@ const QuickStarter: React.FunctionComponent = () => { const themeUtils = React.useContext(ThemeWrapperContext); const [themeMode, setThemeMode] = useState(themeUtils.colorMode); const [showSettingsModal, setshowSettingsModal] = useState(false); - const [showOrphanNodeDeletionDialog, setshowOrphanNodeDeletionDialog] = useState(false); const toggleColorMode = () => { setThemeMode((prevThemeMode) => { @@ -25,6 +24,7 @@ const QuickStarter: React.FunctionComponent = () => { const closeSettingModal = () => { setshowSettingsModal(false); }; + return ( @@ -35,9 +35,6 @@ const QuickStarter: React.FunctionComponent = () => { openSettingsDialog={openSettingsModal} isSettingPanelExpanded={showSettingsModal} closeSettingModal={closeSettingModal} - closeOrphanNodeDeletionModal={closeOrphanNodeDeletionModal} - showOrphanNodeDeletionModal={showOrphanNodeDeletionDialog} - openOrphanNodeDeletionModal={openOrphanNodeDeletionModal} /> @@ -45,4 +42,4 @@ const QuickStarter: React.FunctionComponent = () => { ); }; -export default QuickStarter; +export default QuickStarter; \ No newline at end of file From bacbcfc202c1718a72306c37bff71d19d26be701 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Tue, 20 Aug 2024 12:29:34 +0000 Subject: [PATCH 006/292] connection _check --- backend/score.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/score.py b/backend/score.py index 10bb52738..4daffbc73 100644 --- a/backend/score.py +++ b/backend/score.py @@ -360,7 +360,7 @@ async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=F async def connect(uri=Form(), userName=Form(), password=Form(), database=Form()): try: graph = create_graph_database_connection(uri, userName, password, database) - result = await asyncio.to_thread(connection_check, graph) + result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph) json_obj = {'api_name':'connect','db_url':uri,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(json_obj) return create_api_response('Success',message=result) From 8e14771bf9d61effd65d85909d7c4b602d0d68e2 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Tue, 20 Aug 2024 14:59:04 +0000 Subject: [PATCH 007/292] removed tqdm --- backend/src/communities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/communities.py b/backend/src/communities.py index e2f09cba6..b795ae4ed 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -198,7 +198,7 @@ def create_community_summaries(graph, model): with ThreadPoolExecutor() as executor: futures = {executor.submit(process_community, community, community_chain): community for community in community_info_list} - for future in tqdm(as_completed(futures), total=len(futures), desc="Processing communities"): + for future in as_completed(futures): try: summaries.append(future.result()) except Exception as e: From 2fc3e136df024bac3a13b90705c952983c1d2046 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Wed, 21 Aug 2024 13:06:42 +0530 Subject: [PATCH 008/292] Chatbot changes (#700) * added Hybrid search from graph * modified mode params --- backend/requirements.txt | 2 +- backend/score.py | 10 ++++++---- backend/src/QA_integration_new.py | 26 +++++++++++++------------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index ab42a749d..0c903a325 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -74,7 +74,7 @@ langchain-aws==0.1.9 langchain-anthropic==0.1.19 langchain-fireworks==0.1.4 langchain-google-genai==1.0.7 -langchain-community==0.2.7 +langchain-community==0.2.12 langchain-core==0.2.19 langchain-experimental==0.0.62 langchain-google-vertexai==1.0.6 diff --git a/backend/score.py b/backend/score.py index 3e6512a38..0b63e57d4 100644 --- a/backend/score.py +++ b/backend/score.py @@ -246,10 +246,12 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database json_obj = {'api_name': 'post_processing/update_similarity_graph', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(json_obj) logging.info(f'Updated KNN Graph') - if "create_fulltext_index" in tasks: - await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database) - json_obj = {'api_name': 'post_processing/create_fulltext_index', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(json_obj) + + if "enable_hybrid_search_and_fulltext_search_in_bloom" in tasks: + await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="entities") + # await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="keyword") + josn_obj = {'api_name': 'post_processing/enable_hybrid_search_and_fulltext_search_in_bloom', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(josn_obj) logging.info(f'Full Text index created') if os.environ.get('ENTITY_EMBEDDING','False').upper()=="TRUE" and "materialize_entity_similarities" in tasks: await asyncio.to_thread(create_entity_embedding, graph) diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration_new.py index 1c0bc254c..5a2baa599 100644 --- a/backend/src/QA_integration_new.py +++ b/backend/src/QA_integration_new.py @@ -41,26 +41,26 @@ def get_neo4j_retriever(graph, retrieval_query,document_names,mode,index_name="vector",keyword_index="keyword", search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): try: - if mode == "hybrid": - # neo_db = Neo4jVector.from_existing_graph( - # embedding=EMBEDDING_FUNCTION, - # index_name=index_name, - # retrieval_query=retrieval_query, - # graph=graph, - # search_type="hybrid", - # node_label="Chunk", - # embedding_node_property="embedding", - # text_node_properties=["text"] - # # keyword_index_name=keyword_index - # ) - neo_db = Neo4jVector.from_existing_index( + if mode == "hybrid" or mode == "hybrid+graph": + neo_db = Neo4jVector.from_existing_graph( embedding=EMBEDDING_FUNCTION, index_name=index_name, retrieval_query=retrieval_query, graph=graph, search_type="hybrid", + node_label="Chunk", + embedding_node_property="embedding", + text_node_properties=["text"], keyword_index_name=keyword_index ) + # neo_db = Neo4jVector.from_existing_index( + # embedding=EMBEDDING_FUNCTION, + # index_name=index_name, + # retrieval_query=retrieval_query, + # graph=graph, + # search_type="hybrid", + # keyword_index_name=keyword_index + # ) logging.info(f"Successfully retrieved Neo4jVector index '{index_name}' and keyword index '{keyword_index}'") else: neo_db = Neo4jVector.from_existing_index( From 279d820dfb8b156797f5798e1791cef6483da75e Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Wed, 21 Aug 2024 09:54:24 +0000 Subject: [PATCH 009/292] fixed issue delete entities return count --- backend/score.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/score.py b/backend/score.py index 0b63e57d4..00650b56e 100644 --- a/backend/score.py +++ b/backend/score.py @@ -469,8 +469,8 @@ async def delete_document_and_entities(uri=Form(), graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) result, files_list_size = await asyncio.to_thread(graphDb_data_Access.delete_file_from_graph, filenames, source_types, deleteEntities, MERGED_DIR, uri) - entities_count = result[0]['deletedEntities'] if 'deletedEntities' in result[0] else 0 - message = f"Deleted {files_list_size} documents with {entities_count} entities from database" + # entities_count = result[0]['deletedEntities'] if 'deletedEntities' in result[0] else 0 + message = f"Deleted {files_list_size} documents with entities from database" json_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(json_obj) return create_api_response('Success',message=message) From 30f92cd5114088a3be5d6e85f4a11d2797b2db78 Mon Sep 17 00:00:00 2001 From: Pravesh1988 Date: Wed, 21 Aug 2024 10:00:08 +0000 Subject: [PATCH 010/292] removed specified version due to dependency clashes between versions --- backend/requirements.txt | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index 0c903a325..46c57aea5 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -69,18 +69,18 @@ jsonpath-python==1.0.6 jsonpointer==2.4 json-repair==0.25.2 kiwisolver==1.4.5 -langchain==0.2.8 -langchain-aws==0.1.9 -langchain-anthropic==0.1.19 -langchain-fireworks==0.1.4 -langchain-google-genai==1.0.7 -langchain-community==0.2.12 -langchain-core==0.2.19 -langchain-experimental==0.0.62 -langchain-google-vertexai==1.0.6 -langchain-groq==0.1.6 -langchain-openai==0.1.14 -langchain-text-splitters==0.2.2 +langchain +langchain-aws +langchain-anthropic +langchain-fireworks +langchain-google-genai +langchain-community +langchain-core +langchain-experimental +langchain-google-vertexai +langchain-groq +langchain-openai +langchain-text-splitters langdetect==1.0.9 langsmith==0.1.83 layoutparser==0.3.4 From edcff3bb931c865392c9931a4b07b477dbc702f6 Mon Sep 17 00:00:00 2001 From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Date: Wed, 21 Aug 2024 10:34:20 +0000 Subject: [PATCH 011/292] updated script"integration test cases" --- backend/test_integrationqa.py | 368 +++++++++++++--------------------- 1 file changed, 135 insertions(+), 233 deletions(-) diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index cd662cbdc..20f3effb0 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -1,270 +1,172 @@ +import os +import shutil +import logging +import pandas as pd +from datetime import datetime as dt +from dotenv import load_dotenv + from score import * from src.main import * -import logging from src.QA_integration_new import QA_RAG from langserve import add_routes -import asyncio -import os -from dotenv import load_dotenv -import pandas as pd -from datetime import datetime as dt -uri = '' -userName = '' -password = '' -# model = 'openai-gpt-3.5' -database = 'neo4j' +# Load environment variables if needed +load_dotenv() + +# Constants +URI = '' +USERNAME = '' +PASSWORD = '' +DATABASE = 'neo4j' CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") MERGED_DIR = os.path.join(os.path.dirname(__file__), "merged_files") -graph = create_graph_database_connection(uri, userName, password, database) - - -def test_graph_from_file_local_file(model_name): - model = model_name - file_name = 'About Amazon.pdf' - # shutil.copyfile('data/Bank of America Q23.pdf', 'backend/src/merged_files/Bank of America Q23.pdf') - shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf', - '/workspaces/llm-graph-builder/backend/merged_files/About Amazon.pdf') - obj_source_node = sourceNode() - obj_source_node.file_name = file_name - obj_source_node.file_type = 'pdf' - obj_source_node.file_size = '1087' - obj_source_node.file_source = 'local file' - obj_source_node.model = model - obj_source_node.created_at = datetime.now() - graphDb_data_Access = graphDBdataAccess(graph) - graphDb_data_Access.create_source_node(obj_source_node) - merged_file_path = os.path.join(MERGED_DIR, file_name) - print(merged_file_path) - - local_file_result = extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, '', '') - # final_list.append(local_file_result) - print(local_file_result) +# Initialize database connection +graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + +def create_source_node_local(graph, model, file_name): + """Creates a source node for a local file.""" + source_node = sourceNode() + source_node.file_name = file_name + source_node.file_type = 'pdf' + source_node.file_size = '1087' + source_node.file_source = 'local file' + source_node.model = model + source_node.created_at = dt.now() + graphDB_data_Access = graphDBdataAccess(graph) + graphDB_data_Access.create_source_node(source_node) + return source_node + +def test_graph_from_file_local(model_name): + """Test graph creation from a local file.""" + file_name = 'About Amazon.pdf' + shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf', + os.path.join(MERGED_DIR, file_name)) + + create_source_node_local(graph, model_name, file_name) + merged_file_path = os.path.join(MERGED_DIR, file_name) + + local_file_result = extract_graph_from_file_local_file( + URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '' + ) + logging.info("Local file processing complete") + print(local_file_result) - logging.info("Info: ") - try: - assert local_file_result['status'] == 'Completed' and local_file_result['nodeCount'] > 0 and local_file_result[ - 'relationshipCount'] > 0 - return local_file_result - print("Success") - except AssertionError as e: - print("Fail: ", e) - return local_file_result - - -def test_graph_from_file_local_file_failed(model_name): - model = model_name - file_name = 'Not_exist.pdf' - try: - obj_source_node = sourceNode() - obj_source_node.file_name = file_name - obj_source_node.file_type = 'pdf' - obj_source_node.file_size = '0' - obj_source_node.file_source = 'local file' - obj_source_node.model = model - obj_source_node.created_at = datetime.now() - graphDb_data_Access = graphDBdataAccess(graph) - graphDb_data_Access.create_source_node(obj_source_node) - - local_file_result = extract_graph_from_file_local_file(graph, model, file_name, merged_file_path, '', '') + try: + assert local_file_result['status'] == 'Completed' + assert local_file_result['nodeCount'] > 0 + assert local_file_result['relationshipCount'] > 0 + print("Success") + except AssertionError as e: + print("Fail: ", e) - print(local_file_result) - except AssertionError as e: - print('Failed due to file does not exist means not uploaded or accidentaly deleteled from server') - print("Failed: Error from extract function ", e) + return local_file_result -# Check for Wikipedia file to be test -def test_graph_from_Wikipedia(model_name): - model = model_name +def test_graph_from_wikipedia(model_name): + """Test graph creation from a Wikipedia page.""" wiki_query = 'https://en.wikipedia.org/wiki/Ram_Mandir' source_type = 'Wikipedia' file_name = "Ram_Mandir" - create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type) - wikiresult = extract_graph_from_file_Wikipedia(uri, userName, password, database, model, file_name, 1, 'en', '', '') - logging.info("Info: Wikipedia test done") - print(wikiresult) - + create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, source_type) + + wiki_result = extract_graph_from_file_Wikipedia( + URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 1, 'en', '', '' + ) + logging.info("Wikipedia test done") + print(wiki_result) + try: - assert wikiresult['status'] == 'Completed' and wikiresult['nodeCount'] > 0 and wikiresult['relationshipCount'] > 0 - return wikiresult + assert wiki_result['status'] == 'Completed' + assert wiki_result['nodeCount'] > 0 + assert wiki_result['relationshipCount'] > 0 print("Success") except AssertionError as e: - print("Fail ", e) - return wikiresult + print("Fail: ", e) + return wiki_result -def test_graph_from_Wikipedia_failed(): - wiki_query = 'Test QA 123456' - source_type = 'Wikipedia' - try: - logging.info("Created source node for wikipedia") - create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type) - except AssertionError as e: - print("Fail ", e) - -# Check for Youtube_video to be Success def test_graph_from_youtube_video(model_name): - model = model_name + """Test graph creation from a YouTube video.""" source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' source_type = 'youtube' - create_source_node_graph_url_youtube(graph, model, source_url, source_type) - youtuberesult = extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, '', '') + create_source_node_graph_url_youtube(graph, model_name, source_url, source_type) + youtube_result = extract_graph_from_file_youtube( + URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', '' + ) + logging.info("YouTube Video test done") + print(youtube_result) - logging.info("Info: Youtube Video test done") - print(youtuberesult) try: - assert youtuberesult['status'] == 'Completed' and youtuberesult['nodeCount'] > 1 and youtuberesult[ - 'relationshipCount'] > 1 - return youtuberesult + assert youtube_result['status'] == 'Completed' + assert youtube_result['nodeCount'] > 1 + assert youtube_result['relationshipCount'] > 1 print("Success") except AssertionError as e: - print("Failed ", e) - return youtuberesult - -# Check for Youtube_video to be Failed - -def test_graph_from_youtube_video_failed(): - url = 'https://www.youtube.com/watch?v=U9mJuUkhUzk' - source_type = 'youtube' - - create_source_node_graph_url_youtube(graph, model, url, source_type) - youtuberesult = extract_graph_from_file_youtube(graph, model, url, ',', ',') - # print(result) - print(youtuberesult) - try: - assert youtuberesult['status'] == 'Completed' - return youtuberesult - except AssertionError as e: - print("Failed ", e) - - -# Check for the GCS file to be uploaded, process and completed - -def test_graph_from_file_test_gcs(): - bucket_name = 'test' - folder_name = 'test' - source_type = 'gcs test bucket' - file_name = 'Neuralink brain chip patient playing chess.pdf' - create_source_node_graph_url_gcs(graph, model, bucket_name, folder_name, source_type) - gcsresult = extract_graph_from_file_gcs(graph, model, bucket_name, folder_name, file_name, '', '') + print("Failed: ", e) - logging.info("Info") - print(gcsresult) - - try: - assert gcsresult['status'] == 'Completed' and gcsresult['nodeCount'] > 10 and gcsresult['relationshipCount'] > 5 - print("Success") - except AssertionError as e: - print("Failed ", e) - - -def test_graph_from_file_test_gcs_failed(): - bucket_name = 'llm_graph_test' - folder_name = 'test' - source_type = 'gcs bucket' - # file_name = 'Neuralink brain chip patient playing chess.pdf' - try: - create_source_node_graph_url_gcs(graph, model, bucket_name, folder_name, source_type) - print("GCS: Create source node failed due to bucket not exist") - except AssertionError as e: - print("Failed ", e) - - -def test_graph_from_file_test_s3_failed(): - source_url = 's3://development-llm-test/' - try: - create_source_node_graph_url_s3(graph, model, source_url, 'test123', 'pwd123') - # assert result['status'] == 'Failed' - # print("S3 created source node failed die to wrong access key id and secret") - except AssertionError as e: - print("Failed ", e) - - -# Check the Functionality of Chatbot QnA for mode 'graph+vector' -def test_chatbot_QnA(model_name): - model = model_name - QA_n_RAG = QA_RAG(graph, model, 'Tell me about amazon', '[]', 1, 'graph+vector') + return youtube_result +def test_chatbot_qna(model_name, mode='graph+vector'): + """Test chatbot QnA functionality for different modes.""" + QA_n_RAG = QA_RAG(graph, model_name, 'Tell me about amazon', '[]', 1, mode) print(QA_n_RAG) print(len(QA_n_RAG['message'])) - try: - assert len(QA_n_RAG['message']) > 20 - return QA_n_RAG - print("Success") - except AssertionError as e: - print("Failed ", e) - return QA_n_RAG - -# Check the Functionality of Chatbot QnA for mode 'vector' -def test_chatbot_QnA_vector(model_name): - model = model_name - QA_n_RAG_vector = QA_RAG(graph, model, 'Tell me about amazon', '[]', 1, 'vector') - - - print(QA_n_RAG_vector) - print(len(QA_n_RAG_vector['message'])) try: - assert len(QA_n_RAG_vector['message']) > 20 - return QA_n_RAG_vector - print("Success") - except AssertionError as e: - print("Failed ", e) - return QA_n_RAG_vector - -# Check the Functionality of Chatbot QnA for mode 'hybrid' - -def test_chatbot_QnA_hybrid(model_name): - model = model_name - QA_n_RAG_hybrid = QA_RAG(graph, model, 'Tell me about amazon', '[]', 1, 'hybrid') - - - print(QA_n_RAG_hybrid) - print(len(QA_n_RAG_hybrid['message'])) - try: - assert len(QA_n_RAG_hybrid['message']) > 20 - return QA_n_RAG_hybrid + assert len(QA_n_RAG['message']) > 20 print("Success") except AssertionError as e: - print("Failed ", e) - return QA_n_RAG_hybrid - - + print("Failed: ", e) + + return QA_n_RAG + +def compare_graph_results(results): + """ + Compare graph results across different models. + Add custom logic here to compare graph data, nodes, and relationships. + """ + # Placeholder logic for comparison + print("Comparing results...") + for i in range(len(results) - 1): + result_a = results[i] + result_b = results[i + 1] + if result_a == result_b: + print(f"Result {i} is identical to result {i+1}") + else: + print(f"Result {i} differs from result {i+1}") + +def run_tests(): + final_list = [] + error_list = [] + models = [ + 'openai-gpt-3.5', 'openai-gpt-4o', 'openai-gpt-4o-mini', 'azure_ai_gpt_35', + 'azure_ai_gpt_4o', 'anthropic_claude_3_5_sonnet', 'fireworks_v3p1_405b', + 'fireworks_llama_v3_70b', 'ollama_llama3', 'bedrock_claude_3_5_sonnet' + ] + + for model_name in models: + try: + final_list.append(test_graph_from_file_local(model_name)) + final_list.append(test_graph_from_wikipedia(model_name)) + final_list.append(test_graph_from_youtube_video(model_name)) + final_list.append(test_chatbot_qna(model_name)) + final_list.append(test_chatbot_qna(model_name, mode='vector')) + final_list.append(test_chatbot_qna(model_name, mode='hybrid')) + except Exception as e: + error_list.append((model_name, str(e))) + #Compare and log diffrences in graph results + compare_graph_results(final_list) # Pass the final_list to comapre_graph_results + + # Save final results to CSV + df = pd.DataFrame(final_list) + df['execution_date'] = dt.today().strftime('%Y-%m-%d') + df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + + # Save error details to CSV + df_errors = pd.DataFrame(error_list, columns=['Model', 'Error']) + df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_errors.to_csv(f"Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) if __name__ == "__main__": - final_list = [] - for model_name in ['openai-gpt-3.5','azure_ai_gpt_35','azure_ai_gpt_4o','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet']: - - # local file - response = test_graph_from_file_local_file(model_name) - final_list.append(response) - - # # Wikipedia Test - response = test_graph_from_Wikipedia(model_name) - final_list.append(response) - - # # Youtube Test - response= test_graph_from_youtube_video(model_name) - final_list.append(response) - # # print(final_list) - - # # test_graph_from_file_test_gcs(model_name) # GCS Test - - # #chatbot 'graph+vector' - response = test_chatbot_QnA(model_name) - final_list.append(response) - - # #chatbot 'vector' - response = test_chatbot_QnA_vector(model_name) - final_list.append(response) - - # #chatbot 'hybrid' - response = test_chatbot_QnA_hybrid(model_name) - final_list.append(response) - - # test_graph_from_file_test_s3_failed() # S3 Failed Test Case - df = pd.DataFrame(final_list) - df['execution_date']= datetime.today().strftime('%Y-%m-%d') - df.to_csv(f"Integration_TestResult_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) \ No newline at end of file + run_tests() \ No newline at end of file From 71ed29abb32f83095d0e2cbbe1c22d254ed9bc5b Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 21 Aug 2024 10:54:49 +0000 Subject: [PATCH 012/292] decreased the delay for pollintg API --- frontend/src/services/PollingAPI.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/services/PollingAPI.ts b/frontend/src/services/PollingAPI.ts index 08e6a11bb..baa2e6080 100644 --- a/frontend/src/services/PollingAPI.ts +++ b/frontend/src/services/PollingAPI.ts @@ -15,7 +15,7 @@ export default async function subscribe( const MAX_POLLING_ATTEMPTS = 10; let pollingAttempts = 0; - let delay = 2000; + let delay = 1000; while (pollingAttempts < MAX_POLLING_ATTEMPTS) { let currentdelay = delay; From 1af58770727f65054edc0477aebed91cf142206e Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Wed, 21 Aug 2024 17:27:54 +0530 Subject: [PATCH 013/292] Graph enhancements (#696) * relationship Changes * addition of relationship labels * onclick to nodes * node-highlight * Build fixex * slash docker change * deactivating previous node/relationshsips * lint fixes * class issue * search * search on basis of id / captions * debounce changes * class changes (#693) * legends highlight * search query reset * node size --- frontend/src/App.css | 11 +- .../src/components/ChatBot/ChatInfoModal.tsx | 13 +- .../src/components/ChatBot/Info/InfoModal.tsx | 13 +- .../src/components/Graph/GraphViewModal.tsx | 294 +++++++++++++++--- frontend/src/components/Graph/LegendsChip.tsx | 16 +- .../Deduplication/index.tsx | 6 +- .../DeleteTabForOrphanNodes/index.tsx | 2 +- frontend/src/components/UI/Legend.tsx | 14 +- frontend/src/components/UI/ShowAll.tsx | 38 +++ frontend/src/types.ts | 39 +-- frontend/src/utils/Constants.ts | 4 + frontend/src/utils/Utils.ts | 10 +- frontend/yarn.lock | 2 +- 13 files changed, 363 insertions(+), 99 deletions(-) create mode 100644 frontend/src/components/UI/ShowAll.tsx diff --git a/frontend/src/App.css b/frontend/src/App.css index ff084ffae..fe285c972 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -233,10 +233,8 @@ letter-spacing: 0; line-height: 1.25rem; width: max-content; - height: 30px; text-overflow: ellipsis; white-space: nowrap; - overflow: hidden; } .ndl-widget-content>div { @@ -365,4 +363,13 @@ .widthunset{ width: initial !important; height: initial !important; +} + +.text-input-container { + transition: width 1.5s ease; + /* width: 100dvh; */ +} + +.text-input-container.search-initiated { + width: 60dvh; } \ No newline at end of file diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 7bd837299..42dad1893 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -18,13 +18,20 @@ import wikipedialogo from '../../assets/images/wikipedia.svg'; import youtubelogo from '../../assets/images/youtube.svg'; import gcslogo from '../../assets/images/gcs.webp'; import s3logo from '../../assets/images/s3logo.png'; -import { Chunk, Entity, ExtendedNode, GroupedEntity, UserCredentials, chatInfoMessage } from '../../types'; +import { + Chunk, + Entity, + ExtendedNode, + ExtendedRelationship, + GroupedEntity, + UserCredentials, + chatInfoMessage, +} from '../../types'; import { useContext, useEffect, useMemo, useState } from 'react'; import HoverableLink from '../UI/HoverableLink'; import GraphViewButton from '../Graph/GraphViewButton'; import { chunkEntitiesAPI } from '../../services/ChunkEntitiesInfo'; import { useCredentials } from '../../context/UserCredentials'; -import type { Relationship } from '@neo4j-nvl/base'; import { calcWordColor } from '@neo4j-devtools/word-color'; import ReactMarkdown from 'react-markdown'; import { GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; @@ -51,7 +58,7 @@ const ChatInfoModal: React.FC = ({ const [loading, setLoading] = useState(false); const { userCredentials } = useCredentials(); const [nodes, setNodes] = useState([]); - const [relationships, setRelationships] = useState([]); + const [relationships, setRelationships] = useState([]); const [chunks, setChunks] = useState([]); const themeUtils = useContext(ThemeWrapperContext); const [, copy] = useCopyToClipboard(); diff --git a/frontend/src/components/ChatBot/Info/InfoModal.tsx b/frontend/src/components/ChatBot/Info/InfoModal.tsx index 0dd325731..cf1bbca47 100644 --- a/frontend/src/components/ChatBot/Info/InfoModal.tsx +++ b/frontend/src/components/ChatBot/Info/InfoModal.tsx @@ -6,13 +6,20 @@ import wikipedialogo from '../../../assets/images/Wikipedia-logo-v2.svg'; import youtubelogo from '../../../assets/images/youtube.png'; import gcslogo from '../../../assets/images/gcs.webp'; import s3logo from '../../../assets/images/s3logo.png'; -import { Chunk, Entity, ExtendedNode, GroupedEntity, UserCredentials, chatInfoMessage } from '../../../types'; +import { + Chunk, + Entity, + ExtendedNode, + ExtendedRelationship, + GroupedEntity, + UserCredentials, + chatInfoMessage, +} from '../../../types'; import { useEffect, useMemo, useState } from 'react'; import HoverableLink from '../../UI/HoverableLink'; import GraphViewButton from '../../Graph/GraphViewButton'; import { chunkEntitiesAPI } from '../../../services/ChunkEntitiesInfo'; import { useCredentials } from '../../../context/UserCredentials'; -import type { Relationship } from '@neo4j-nvl/base'; import { calcWordColor } from '@neo4j-devtools/word-color'; import ReactMarkdown from 'react-markdown'; import { GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; @@ -23,7 +30,7 @@ const InfoModal: React.FC = ({ sources, model, total_tokens, re const [loading, setLoading] = useState(false); const { userCredentials } = useCredentials(); const [nodes, setNodes] = useState([]); - const [relationships, setRelationships] = useState([]); + const [relationships, setRelationships] = useState([]); const [chunks, setChunks] = useState([]); const parseEntity = (entity: Entity) => { const { labels, properties } = entity; diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 5479509a1..ca407b4bb 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -1,6 +1,23 @@ -import { Banner, Dialog, Flex, IconButtonArray, LoadingSpinner, Typography } from '@neo4j-ndl/react'; +import { + Banner, + Dialog, + Flex, + IconButton, + IconButtonArray, + LoadingSpinner, + TextInput, + Typography, + useDebounce, +} from '@neo4j-ndl/react'; import { useCallback, useEffect, useRef, useState } from 'react'; -import { ExtendedNode, GraphType, GraphViewModalProps, Scheme, UserCredentials } from '../../types'; +import { + ExtendedNode, + ExtendedRelationship, + GraphType, + GraphViewModalProps, + Scheme, + UserCredentials, +} from '../../types'; import { InteractiveNvlWrapper } from '@neo4j-nvl/react'; import NVL from '@neo4j-nvl/base'; import type { Node, Relationship } from '@neo4j-nvl/base'; @@ -9,16 +26,26 @@ import { ArrowPathIconOutline, DragIcon, FitToScreenIcon, + MagnifyingGlassIconOutline, MagnifyingGlassMinusIconOutline, MagnifyingGlassPlusIconOutline, } from '@neo4j-ndl/react/icons'; import IconButtonWithToolTip from '../UI/IconButtonToolTip'; -import { filterData, processGraphData } from '../../utils/Utils'; +import { filterData, processGraphData, sortAlphabetically } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; import { LegendsChip } from './LegendsChip'; import graphQueryAPI from '../../services/GraphQuery'; -import { graphLabels, intitalGraphType, mouseEventCallbacks, nvlOptions, queryMap } from '../../utils/Constants'; +import { + graphLabels, + intitalGraphType, + mouseEventCallbacks, + nvlOptions, + queryMap, + RESULT_STEP_SIZE, +} from '../../utils/Constants'; import CheckboxSelection from './CheckboxSelection'; +import { ShowAll } from '../UI/ShowAll'; + const GraphViewModal: React.FunctionComponent = ({ open, inspectedName, @@ -40,6 +67,10 @@ const GraphViewModal: React.FunctionComponent = ({ const { userCredentials } = useCredentials(); const [scheme, setScheme] = useState({}); const [newScheme, setNewScheme] = useState({}); + const [searchQuery, setSearchQuery] = useState(''); + const debouncedQuery = useDebounce(searchQuery, 300); + + // the checkbox selection const handleCheckboxChange = (graph: GraphType) => { const currentIndex = graphType.indexOf(graph); const newGraphSelected = [...graphType]; @@ -50,18 +81,28 @@ const GraphViewModal: React.FunctionComponent = ({ newGraphSelected.splice(currentIndex, 1); initGraph(newGraphSelected, allNodes, allRelationships, scheme); } + setSearchQuery(''); setGraphType(newGraphSelected); }; + const nodeCount = (nodes: ExtendedNode[], label: string): number => { + return [...new Set(nodes?.filter((n) => n.labels?.includes(label)).map((i) => i.id))].length; + }; + + const relationshipCount = (relationships: ExtendedRelationship[], label: string): number => { + return [...new Set(relationships?.filter((r) => r.caption?.includes(label)).map((i) => i.id))].length; + }; + const graphQuery: string = graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; + // fit graph to original position const handleZoomToFit = () => { nvlRef.current?.fit( allNodes.map((node) => node.id), @@ -75,7 +116,9 @@ const GraphViewModal: React.FunctionComponent = ({ handleZoomToFit(); }, 10); return () => { - nvlRef.current?.destroy(); + if (nvlRef.current) { + nvlRef.current?.destroy(); + } setGraphType(intitalGraphType); clearTimeout(timeoutId); setScheme({}); @@ -83,6 +126,7 @@ const GraphViewModal: React.FunctionComponent = ({ setRelationships([]); setAllNodes([]); setAllRelationships([]); + setSearchQuery(''); }; }, []); @@ -91,10 +135,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -151,6 +195,64 @@ const GraphViewModal: React.FunctionComponent = ({ } }, [open]); + // The search and update nodes + const handleSearch = useCallback((value: string) => { + const query = value.toLowerCase(); + const updatedNodes = nodes.map((node) => { + if (query === '') { + return { + ...node, + activated: false, + selected: false, + size: graphLabels.nodeSize + }; + } + const { id, properties, caption } = node; + const propertiesMatch = properties?.id?.toLowerCase().includes(query); + const match = id.toLowerCase().includes(query) || propertiesMatch || caption?.toLowerCase().includes(query); + return { + ...node, + activated: match, + selected: match, + size: (match && viewPoint === graphLabels.showGraphView) ? 100 : (match && viewPoint !== graphLabels.showGraphView) ? 50 : graphLabels.nodeSize + }; + + }); + // deactivating any active relationships + const updatedRelationships = relationships.map((rel) => { + return { + ...rel, + activated: false, + selected: false, + }; + }); + setNodes(updatedNodes); + setRelationships(updatedRelationships); + }, [nodes]); + + useEffect(() => { + handleSearch(debouncedQuery) + }, [debouncedQuery]) + + const initGraph = ( + graphType: GraphType[], + finalNodes: ExtendedNode[], + finalRels: Relationship[], + schemeVal: Scheme + ) => { + if (allNodes.length > 0 && allRelationships.length > 0) { + const { filteredNodes, filteredRelations, filteredScheme } = filterData( + graphType, + finalNodes ?? [], + finalRels ?? [], + schemeVal + ); + setNodes(filteredNodes); + setRelationships(filteredRelations); + setNewScheme(filteredScheme); + } + }; + // Unmounting the component if (!open) { return <>; @@ -201,10 +303,11 @@ const GraphViewModal: React.FunctionComponent = ({ setRelationships([]); setAllNodes([]); setAllRelationships([]); + setSearchQuery(''); }; // sort the legends in with Chunk and Document always the first two values - const legendCheck = Object.keys(newScheme).sort((a, b) => { + const nodeCheck = Object.keys(newScheme).sort((a, b) => { if (a === graphLabels.document || a === graphLabels.chunk) { return -1; } else if (b === graphLabels.document || b === graphLabels.chunk) { @@ -213,23 +316,70 @@ const GraphViewModal: React.FunctionComponent = ({ return a.localeCompare(b); }); - const initGraph = ( - graphType: GraphType[], - finalNodes: ExtendedNode[], - finalRels: Relationship[], - schemeVal: Scheme - ) => { - if (allNodes.length > 0 && allRelationships.length > 0) { - const { filteredNodes, filteredRelations, filteredScheme } = filterData( - graphType, - finalNodes ?? [], - finalRels ?? [], - schemeVal - ); - setNodes(filteredNodes); - setRelationships(filteredRelations); - setNewScheme(filteredScheme); + // get sorted relationships + const relationshipsSorted = relationships.sort(sortAlphabetically); + + // To get the relationship count + const groupedAndSortedRelationships: ExtendedRelationship[] = Object.values( + relationshipsSorted.reduce((acc: { [key: string]: ExtendedRelationship }, relType: Relationship) => { + const key = relType.caption || ''; + if (!acc[key]) { + acc[key] = { ...relType, count: 0 }; + } + + acc[key]!.count += relationshipCount(relationships as ExtendedRelationship[], key); + return acc; + }, {}) + ); + + // On Node Click, highlighting the nodes and deactivating any active relationships + const handleNodeClick = (nodeLabel: string) => { + const updatedNodes = nodes.map((node) => { + const isActive = node.labels.includes(nodeLabel); + return { + ...node, + activated: isActive, + selected: isActive, + size: (isActive && viewPoint === graphLabels.showGraphView) ? 100 : (isActive && viewPoint !== graphLabels.showGraphView) ? 50 : graphLabels.nodeSize + }; + }); + // deactivating any active relationships + const updatedRelationships = relationships.map((rel) => { + return { + ...rel, + activated: false, + selected: false, + }; + }); + if (searchQuery !== '') { + setSearchQuery(''); } + setNodes(updatedNodes); + setRelationships(updatedRelationships); + }; + // On Relationship Legend Click, highlight the relationships and deactivating any active nodes + const handleRelationshipClick = (nodeLabel: string) => { + const updatedRelations = relationships.map((rel) => { + return { + ...rel, + activated: rel?.caption?.includes(nodeLabel), + selected: rel?.caption?.includes(nodeLabel), + }; + }); + // // deactivating any active nodes + const updatedNodes = nodes.map((node) => { + return { + ...node, + activated: false, + selected: false, + size: graphLabels.nodeSize + }; + }); + if (searchQuery !== '') { + setSearchQuery(''); + } + setRelationships(updatedRelations); + setNodes(updatedNodes); }; return ( @@ -334,17 +484,79 @@ const GraphViewModal: React.FunctionComponent = ({ handleClasses={{ left: 'ml-1' }} >
- - {graphLabels.resultOverview} - - {graphLabels.totalNodes} ({nodes.length}) - - -
- {legendCheck.map((key, index) => ( - - ))} -
+ {nodeCheck.length > 0 && ( + <> + + {graphLabels.resultOverview} +
+ { + setSearchQuery(e.target.value); + }} + placeholder='Search On Node Properties' + fluid={true} + leftIcon={ + + + + } + /> +
+ + {graphLabels.totalNodes} ({nodes.length}) + +
+
+ + {nodeCheck.map((nodeLabel, index) => ( + handleNodeClick(nodeLabel)} + /> + ))} + +
+ + )} + {relationshipsSorted.length > 0 && ( + <> + + + {graphLabels.totalRelationships} ({relationships.length}) + + +
+ + {groupedAndSortedRelationships.map((relType, index) => ( + handleRelationshipClick(relType.caption || '')} + /> + ))} + +
+ + )}
diff --git a/frontend/src/components/Graph/LegendsChip.tsx b/frontend/src/components/Graph/LegendsChip.tsx index 7d2de8b74..2fdc16df2 100644 --- a/frontend/src/components/Graph/LegendsChip.tsx +++ b/frontend/src/components/Graph/LegendsChip.tsx @@ -1,12 +1,14 @@ -import { useMemo } from 'react'; import { LegendChipProps } from '../../types'; import Legend from '../UI/Legend'; -export const LegendsChip: React.FunctionComponent = ({ scheme, title, nodes }) => { - const chunkcount = useMemo( - () => [...new Set(nodes?.filter((n) => n?.labels?.includes(title)).map((i) => i.id))].length, - [] +export const LegendsChip: React.FunctionComponent = ({ + scheme, + label, + type, + count, + onClick, +}) => { + return ( + ); - - return ; }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index bf8345550..36fcff3d1 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -80,12 +80,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - (d.e.elementId === nodeid + d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d) + : d ); }); }; @@ -160,7 +160,7 @@ export default function DeduplicationTab() { return ( {info.getValue().map((l, index) => ( - + ))} ); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx index 9cdf239dc..f836c9e87 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx @@ -111,7 +111,7 @@ export default function DeletePopUpForOrphanNodes({ return ( {info.getValue().map((l, index) => ( - + ))} ); diff --git a/frontend/src/components/UI/Legend.tsx b/frontend/src/components/UI/Legend.tsx index d798ff4f4..a3c806669 100644 --- a/frontend/src/components/UI/Legend.tsx +++ b/frontend/src/components/UI/Legend.tsx @@ -3,16 +3,20 @@ import { GraphLabel } from '@neo4j-ndl/react'; export default function Legend({ bgColor, title, - chunkCount, + count, + type, + onClick, }: { bgColor: string; title: string; - chunkCount?: number; + count?: number; + type: 'node' | 'relationship' | 'propertyKey'; + tabIndex?: number; + onClick?: (e: React.MouseEvent) => void; }) { return ( - - {title} - {chunkCount && `(${chunkCount})`} + + {title} {count !== undefined && `(${count})`} ); } diff --git a/frontend/src/components/UI/ShowAll.tsx b/frontend/src/components/UI/ShowAll.tsx new file mode 100644 index 000000000..726146723 --- /dev/null +++ b/frontend/src/components/UI/ShowAll.tsx @@ -0,0 +1,38 @@ +import { Button } from '@neo4j-ndl/react'; +import type { ReactNode } from 'react'; +import { useState } from 'react'; + +// import { ButtonGroup } from '../button-group/button-group'; + +type ShowAllProps = { + initiallyShown: number; + /* pass thunk to enable rendering only shown components */ + children: ((() => ReactNode) | ReactNode)[]; + ariaLabel?: string; +}; +const isThunkComponent = (t: (() => ReactNode) | ReactNode): t is () => ReactNode => typeof t === 'function'; + +export function ShowAll({ initiallyShown, children }: ShowAllProps) { + const [expanded, setExpanded] = useState(false); + const toggleExpanded = () => setExpanded((e) => !e); + const itemCount = children.length; + const controlsNeeded = itemCount > initiallyShown; + const shown = expanded ? itemCount : initiallyShown; + const leftToShow = itemCount - shown; + + if (itemCount === 0) { + return null; + } + + const currentChildren = children.slice(0, shown).map((c) => (isThunkComponent(c) ? c() : c)); + return ( + <> +
{currentChildren}
+ {controlsNeeded && ( + + )} + + ); +} diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 8e05f632b..5bf076d10 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -259,7 +259,7 @@ export interface GraphViewModalProps { setGraphViewOpen: Dispatch>; viewPoint: string; nodeValues?: ExtendedNode[]; - relationshipValues?: Relationship[]; + relationshipValues?: ExtendedRelationship[]; selectedRows?: CustomFile[] | undefined; } @@ -344,13 +344,13 @@ export type alertStateType = { export type Scheme = Record; export type LabelCount = Record; -interface NodeType extends Partial { - labels?: string[]; -} + export interface LegendChipProps { scheme: Scheme; - title: string; - nodes: NodeType[]; + label: string; + type: 'node' | 'relationship' | 'propertyKey'; + count: number; + onClick: (e: React.MouseEvent) => void; } export interface FileContextProviderProps { children: ReactNode; @@ -578,29 +578,6 @@ export type GraphStatsLabels = Record< } >; -type NodeStyling = { - backgroundColor: string; - borderColor: string; - textColor: string; - caption: string; - diameter: string; -}; - -type RelationStyling = { - fontSize: string; - lineColor: string; - textColorExternal: string; - textColorInternal: string; - caption: string; - padding: string; - width: string; -}; - -export type GraphStyling = { - node: Record>; - relationship: Record>; -}; - export interface ExtendedNode extends Node { labels: string[]; properties: { @@ -610,7 +587,7 @@ export interface ExtendedNode extends Node { } export interface ExtendedRelationship extends Relationship { - labels: string[]; + count: number; } export interface connectionState { openPopUp: boolean; @@ -655,7 +632,7 @@ export interface S3File { } export interface GraphViewButtonProps { nodeValues?: ExtendedNode[]; - relationshipValues?: Relationship[]; + relationshipValues?: ExtendedRelationship[]; } export interface DrawerChatbotProps { isExpanded: boolean; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index b4781b929..33e1d1a04 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -231,4 +231,8 @@ export const graphLabels = { totalNodes: 'Total Nodes', noEntities: 'No Entities Found', selectCheckbox: 'Select atleast one checkbox for graph view', + totalRelationships: 'Total Relationships', + nodeSize: 30 }; + +export const RESULT_STEP_SIZE = 25; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 9745993f6..0b8b05841 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -1,6 +1,6 @@ import { calcWordColor } from '@neo4j-devtools/word-color'; import type { Relationship } from '@neo4j-nvl/base'; -import { Entity, ExtendedNode, GraphType, Messages, Scheme } from '../types'; +import { Entity, ExtendedNode, ExtendedRelationship, GraphType, Messages, Scheme } from '../types'; // Get the Url export const url = () => { @@ -130,7 +130,7 @@ export function extractPdfFileName(url: string): string { return decodedFileName; } -export const processGraphData = (neoNodes: ExtendedNode[], neoRels: Relationship[]) => { +export const processGraphData = (neoNodes: ExtendedNode[], neoRels: ExtendedRelationship[]) => { const schemeVal: Scheme = {}; let iterator = 0; const labels: string[] = neoNodes.map((f: any) => f.labels); @@ -239,3 +239,9 @@ export const parseEntity = (entity: Entity) => { export const titleCheck = (title: string) => { return title === 'Chunk' || title === 'Document'; }; + +export const sortAlphabetically = (a: Relationship, b: Relationship) => { + const captionOne = a.caption?.toLowerCase() || ''; + const captionTwo = b.caption?.toLowerCase() || ''; + return captionOne.localeCompare(captionTwo); +}; diff --git a/frontend/yarn.lock b/frontend/yarn.lock index c91c34832..3915845ef 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -6614,4 +6614,4 @@ yocto-queue@^0.1.0: zwitch@^2.0.0: version "2.0.4" resolved "https://registry.yarnpkg.com/zwitch/-/zwitch-2.0.4.tgz#c827d4b0acb76fc3e685a4c6ec2902d51070e9d7" - integrity sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A== + integrity sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A== \ No newline at end of file From 45751980adc8ffb6c03df95c97f8b9550f128b9a Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Wed, 21 Aug 2024 17:28:11 +0530 Subject: [PATCH 014/292] changed chat mode names (#702) --- backend/src/QA_integration_new.py | 4 ++-- frontend/src/utils/Constants.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration_new.py index 5a2baa599..9ab42a497 100644 --- a/backend/src/QA_integration_new.py +++ b/backend/src/QA_integration_new.py @@ -41,7 +41,7 @@ def get_neo4j_retriever(graph, retrieval_query,document_names,mode,index_name="vector",keyword_index="keyword", search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): try: - if mode == "hybrid" or mode == "hybrid+graph": + if mode == "fulltext" or mode == "fulltext+graph": neo_db = Neo4jVector.from_existing_graph( embedding=EMBEDDING_FUNCTION, index_name=index_name, @@ -374,7 +374,7 @@ def QA_RAG(graph, model, question, document_names,session_id, mode): "user": "chatbot" } return result - elif mode == "vector" or mode == "hybrid": + elif mode == "vector" or mode == "fulltext": retrieval_query = VECTOR_SEARCH_QUERY else: retrieval_query = VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT) diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 33e1d1a04..ee2296a3a 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -59,7 +59,7 @@ export const defaultLLM = llms?.includes('openai-gpt-4o-mini') export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',') - : ['vector', 'graph', 'graph+vector', 'hybrid', 'hybrid+graph']; + : ['vector', 'graph', 'graph+vector', 'fulltext', 'fulltext+graph']; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; export const timePerByte = 0.2; From a6ee3451aaf62700d393a16b8809d59303a8d345 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 21 Aug 2024 12:58:28 +0000 Subject: [PATCH 015/292] env changes --- README.md | 2 +- example.env | 1 - frontend/Dockerfile | 3 +-- frontend/example.env | 1 - 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 800721ec3..e0b59346d 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ Allow unauthenticated request : Yes | VITE_LLM_MODELS | Mandatory | diffbot,openai-gpt-3.5,openai-gpt-4o | Models available for selection on the frontend, used for entities extraction and Q&A | VITE_CHAT_MODES | Mandatory | vector,graph+vector,graph,hybrid | Chat modes available for Q&A | VITE_ENV | Mandatory | DEV or PROD | Environment variable for the app | -| VITE_TIME_PER_CHUNK | Optional | 4 | Time per chunk for processing | +| VITE_TIME_PER_PAGE | Optional | 50 | Time per page for processing | | VITE_CHUNK_SIZE | Optional | 5242880 | Size of each chunk of file for upload | | VITE_GOOGLE_CLIENT_ID | Optional | | Client ID for Google authentication | | GCS_FILE_CACHE | Optional | False | If set to True, will save the files to process into GCS. If set to False, will save the files locally | diff --git a/example.env b/example.env index b50e3e985..ed33101fb 100644 --- a/example.env +++ b/example.env @@ -30,7 +30,6 @@ VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL= VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web" VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" # ",ollama_llama3" VITE_ENV="DEV" -VITE_TIME_PER_CHUNK=4 VITE_TIME_PER_PAGE=50 VITE_CHUNK_SIZE=5242880 VITE_GOOGLE_CLIENT_ID="" diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 7a31d5bcf..c3a7c1c82 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -6,7 +6,6 @@ ARG VITE_REACT_APP_SOURCES="" ARG VITE_LLM_MODELS="" ARG VITE_GOOGLE_CLIENT_ID="" ARG VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true" -ARG VITE_TIME_PER_CHUNK=4 ARG VITE_TIME_PER_PAGE=50 ARG VITE_LARGE_FILE_SIZE=5242880 ARG VITE_CHUNK_SIZE=5242880 @@ -23,8 +22,8 @@ RUN VITE_BACKEND_API_URL=$VITE_BACKEND_API_URL \ VITE_LLM_MODELS=$VITE_LLM_MODELS \ VITE_GOOGLE_CLIENT_ID=$VITE_GOOGLE_CLIENT_ID \ VITE_BLOOM_URL=$VITE_BLOOM_URL \ - VITE_TIME_PER_CHUNK=$VITE_TIME_PER_CHUNK \ VITE_CHUNK_SIZE=$VITE_CHUNK_SIZE \ + VITE_TIME_PER_PAGE=$VITE_TIME_PER_PAGE \ VITE_ENV=$VITE_ENV \ VITE_LARGE_FILE_SIZE=${VITE_LARGE_FILE_SIZE} \ VITE_CHAT_MODES=$VITE_CHAT_MODES \ diff --git a/frontend/example.env b/frontend/example.env index 05b8cdf60..63bd3e7c3 100644 --- a/frontend/example.env +++ b/frontend/example.env @@ -3,7 +3,6 @@ VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL= VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web" VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" VITE_ENV="DEV" -VITE_TIME_PER_CHUNK=4 VITE_TIME_PER_PAGE=50 VITE_CHUNK_SIZE=5242880 VITE_LARGE_FILE_SIZE=5242880 From dc351a074d3bc56d9662a844796512febd627a3b Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 22 Aug 2024 06:32:43 +0000 Subject: [PATCH 016/292] used axios instance for network calls --- frontend/src/API/Index.ts | 7 +++++++ frontend/src/services/CancelAPI.ts | 5 ++--- frontend/src/services/ChunkEntitiesInfo.ts | 5 ++--- frontend/src/services/CommonAPI.ts | 5 +++-- frontend/src/services/ConnectAPI.ts | 5 ++--- frontend/src/services/DeleteFiles.ts | 5 ++--- frontend/src/services/DeleteOrphanNodes.ts | 5 ++--- frontend/src/services/GetDuplicateNodes.ts | 5 ++--- frontend/src/services/GetFiles.ts | 7 +++---- frontend/src/services/GetNodeLabelsRelTypes.ts | 5 ++--- frontend/src/services/GetOrphanNodes.ts | 5 ++--- frontend/src/services/GraphQuery.ts | 5 ++--- frontend/src/services/HealthStatus.ts | 7 +++---- frontend/src/services/MergeDuplicateEntities.ts | 5 ++--- frontend/src/services/PollingAPI.ts | 7 +++---- frontend/src/services/PostProcessing.ts | 5 ++--- frontend/src/services/QnaAPI.ts | 7 +++---- frontend/src/services/SchemaFromTextAPI.ts | 5 ++--- frontend/src/services/URLScan.ts | 5 ++--- frontend/src/services/vectorIndexCreation.ts | 5 ++--- 20 files changed, 50 insertions(+), 60 deletions(-) create mode 100644 frontend/src/API/Index.ts diff --git a/frontend/src/API/Index.ts b/frontend/src/API/Index.ts new file mode 100644 index 000000000..d086970e4 --- /dev/null +++ b/frontend/src/API/Index.ts @@ -0,0 +1,7 @@ +import axios from 'axios' +import { url } from '../utils/Utils' + +const api=axios.create({ + baseURL:url() +}) +export default api \ No newline at end of file diff --git a/frontend/src/services/CancelAPI.ts b/frontend/src/services/CancelAPI.ts index a162bed83..de3ea9ba0 100644 --- a/frontend/src/services/CancelAPI.ts +++ b/frontend/src/services/CancelAPI.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { UserCredentials, commonserverresponse } from '../types'; +import api from '../API/Index'; const cancelAPI = async (filenames: string[], source_types: string[]) => { try { @@ -14,7 +13,7 @@ const cancelAPI = async (filenames: string[], source_types: string[]) => { } formData.append('filenames', JSON.stringify(filenames)); formData.append('source_types', JSON.stringify(source_types)); - const response = await axios.post(`${url()}/cancelled_job`, formData); + const response = await api.post(`/cancelled_job`, formData); return response; } catch (error) { console.log('Error Posting the Question:', error); diff --git a/frontend/src/services/ChunkEntitiesInfo.ts b/frontend/src/services/ChunkEntitiesInfo.ts index 3b4323197..aa133c815 100644 --- a/frontend/src/services/ChunkEntitiesInfo.ts +++ b/frontend/src/services/ChunkEntitiesInfo.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { ChatInfo_APIResponse, UserCredentials } from '../types'; +import api from '../API/Index'; const chunkEntitiesAPI = async (userCredentials: UserCredentials, chunk_ids: string) => { try { @@ -10,7 +9,7 @@ const chunkEntitiesAPI = async (userCredentials: UserCredentials, chunk_ids: str formData.append('password', userCredentials?.password ?? ''); formData.append('chunk_ids', chunk_ids); - const response: ChatInfo_APIResponse = await axios.post(`${url()}/chunk_entities`, formData, { + const response: ChatInfo_APIResponse = await api.post(`/chunk_entities`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, diff --git a/frontend/src/services/CommonAPI.ts b/frontend/src/services/CommonAPI.ts index 78d324248..bbae83032 100644 --- a/frontend/src/services/CommonAPI.ts +++ b/frontend/src/services/CommonAPI.ts @@ -1,5 +1,6 @@ -import axios, { AxiosResponse, Method } from 'axios'; +import { AxiosResponse, Method } from 'axios'; import { UserCredentials, FormDataParams } from '../types'; +import api from '../API/Index'; // API Call const apiCall = async ( @@ -16,7 +17,7 @@ const apiCall = async ( for (const key in additionalParams) { formData.append(key, additionalParams[key]); } - const response: AxiosResponse = await axios({ + const response: AxiosResponse = await api({ method: method, url: url, data: formData, diff --git a/frontend/src/services/ConnectAPI.ts b/frontend/src/services/ConnectAPI.ts index 73d997b1e..026c41c44 100644 --- a/frontend/src/services/ConnectAPI.ts +++ b/frontend/src/services/ConnectAPI.ts @@ -1,5 +1,4 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; +import api from '../API/Index'; const connectAPI = async (connectionURI: string, username: string, password: string, database: string) => { try { @@ -8,7 +7,7 @@ const connectAPI = async (connectionURI: string, username: string, password: str formData.append('database', database ?? ''); formData.append('userName', username ?? ''); formData.append('password', password ?? ''); - const response = await axios.post(`${url()}/connect`, formData, { + const response = await api.post(`/connect`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, diff --git a/frontend/src/services/DeleteFiles.ts b/frontend/src/services/DeleteFiles.ts index 36ae28cbd..a86ef187e 100644 --- a/frontend/src/services/DeleteFiles.ts +++ b/frontend/src/services/DeleteFiles.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { CustomFile, UserCredentials } from '../types'; +import api from '../API/Index'; const deleteAPI = async (userCredentials: UserCredentials, selectedFiles: CustomFile[], deleteEntities: boolean) => { try { @@ -14,7 +13,7 @@ const deleteAPI = async (userCredentials: UserCredentials, selectedFiles: Custom formData.append('deleteEntities', JSON.stringify(deleteEntities)); formData.append('filenames', JSON.stringify(filenames)); formData.append('source_types', JSON.stringify(source_types)); - const response = await axios.post(`${url()}/delete_document_and_entities`, formData); + const response = await api.post(`/delete_document_and_entities`, formData); return response; } catch (error) { console.log('Error Posting the Question:', error); diff --git a/frontend/src/services/DeleteOrphanNodes.ts b/frontend/src/services/DeleteOrphanNodes.ts index 135dfcdeb..2cebc572c 100644 --- a/frontend/src/services/DeleteOrphanNodes.ts +++ b/frontend/src/services/DeleteOrphanNodes.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { UserCredentials } from '../types'; +import api from '../API/Index'; const deleteOrphanAPI = async (userCredentials: UserCredentials, selectedNodes: string[]) => { try { @@ -10,7 +9,7 @@ const deleteOrphanAPI = async (userCredentials: UserCredentials, selectedNodes: formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); formData.append('unconnected_entities_list', JSON.stringify(selectedNodes)); - const response = await axios.post(`${url()}/delete_unconnected_nodes`, formData); + const response = await api.post(`/delete_unconnected_nodes`, formData); return response; } catch (error) { console.log('Error Posting the Question:', error); diff --git a/frontend/src/services/GetDuplicateNodes.ts b/frontend/src/services/GetDuplicateNodes.ts index 3cb27a970..b7ea0c426 100644 --- a/frontend/src/services/GetDuplicateNodes.ts +++ b/frontend/src/services/GetDuplicateNodes.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { duplicateNodesData, UserCredentials } from '../types'; +import api from '../API/Index'; export const getDuplicateNodes = async (userCredentials: UserCredentials) => { const formData = new FormData(); @@ -9,7 +8,7 @@ export const getDuplicateNodes = async (userCredentials: UserCredentials) => { formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); try { - const response = await axios.post(`${url()}/get_duplicate_nodes`, formData); + const response = await api.post(`/get_duplicate_nodes`, formData); return response; } catch (error) { console.log(error); diff --git a/frontend/src/services/GetFiles.ts b/frontend/src/services/GetFiles.ts index fd8d60fba..0744ff047 100644 --- a/frontend/src/services/GetFiles.ts +++ b/frontend/src/services/GetFiles.ts @@ -1,12 +1,11 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { SourceListServerData, UserCredentials } from '../types'; +import api from '../API/Index'; export const getSourceNodes = async (userCredentials: UserCredentials) => { try { const encodedstr = btoa(userCredentials.password); - const response = await axios.get( - `${url()}/sources_list?uri=${userCredentials.uri}&database=${userCredentials.database}&userName=${ + const response = await api.get( + `/sources_list?uri=${userCredentials.uri}&database=${userCredentials.database}&userName=${ userCredentials.userName }&password=${encodedstr}` ); diff --git a/frontend/src/services/GetNodeLabelsRelTypes.ts b/frontend/src/services/GetNodeLabelsRelTypes.ts index acc05f267..8c7345c2a 100644 --- a/frontend/src/services/GetNodeLabelsRelTypes.ts +++ b/frontend/src/services/GetNodeLabelsRelTypes.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { ServerData, UserCredentials } from '../types'; +import api from '../API/Index'; export const getNodeLabelsAndRelTypes = async (userCredentials: UserCredentials) => { const formData = new FormData(); @@ -9,7 +8,7 @@ export const getNodeLabelsAndRelTypes = async (userCredentials: UserCredentials) formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); try { - const response = await axios.post(`${url()}/schema`, formData); + const response = await api.post(`/schema`, formData); return response; } catch (error) { console.log(error); diff --git a/frontend/src/services/GetOrphanNodes.ts b/frontend/src/services/GetOrphanNodes.ts index 70cfe8cda..3578214d1 100644 --- a/frontend/src/services/GetOrphanNodes.ts +++ b/frontend/src/services/GetOrphanNodes.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { OrphanNodeResponse, UserCredentials } from '../types'; +import api from '../API/Index'; export const getOrphanNodes = async (userCredentials: UserCredentials) => { const formData = new FormData(); @@ -9,7 +8,7 @@ export const getOrphanNodes = async (userCredentials: UserCredentials) => { formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); try { - const response = await axios.post(`${url()}/get_unconnected_nodes_list`, formData); + const response = await api.post(`/get_unconnected_nodes_list`, formData); return response; } catch (error) { console.log(error); diff --git a/frontend/src/services/GraphQuery.ts b/frontend/src/services/GraphQuery.ts index 55d22a5ee..f792f4aec 100644 --- a/frontend/src/services/GraphQuery.ts +++ b/frontend/src/services/GraphQuery.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { UserCredentials } from '../types'; +import api from '../API/Index'; const graphQueryAPI = async ( userCredentials: UserCredentials, @@ -16,7 +15,7 @@ const graphQueryAPI = async ( formData.append('query_type', query_type ?? 'entities'); formData.append('document_names', JSON.stringify(document_names)); - const response = await axios.post(`${url()}/graph_query`, formData, { + const response = await api.post(`/graph_query`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, diff --git a/frontend/src/services/HealthStatus.ts b/frontend/src/services/HealthStatus.ts index 69a77f0fe..a59badbe0 100644 --- a/frontend/src/services/HealthStatus.ts +++ b/frontend/src/services/HealthStatus.ts @@ -1,9 +1,8 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; +import api from '../API/Index'; const healthStatus = async () => { try { - const healthUrl = `${url()}/health`; - const response = await axios.get(healthUrl); + const healthUrl = `/health`; + const response = await api.get(healthUrl); return response; } catch (error) { console.log('API status error', error); diff --git a/frontend/src/services/MergeDuplicateEntities.ts b/frontend/src/services/MergeDuplicateEntities.ts index ee4e0fffb..1fdb9b387 100644 --- a/frontend/src/services/MergeDuplicateEntities.ts +++ b/frontend/src/services/MergeDuplicateEntities.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { commonserverresponse, selectedDuplicateNodes, UserCredentials } from '../types'; +import api from '../API/Index'; const mergeDuplicateNodes = async (userCredentials: UserCredentials, selectedNodes: selectedDuplicateNodes[]) => { try { @@ -10,7 +9,7 @@ const mergeDuplicateNodes = async (userCredentials: UserCredentials, selectedNod formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); formData.append('duplicate_nodes_list', JSON.stringify(selectedNodes)); - const response = await axios.post(`${url()}/merge_duplicate_nodes`, formData); + const response = await api.post(`/merge_duplicate_nodes`, formData); return response; } catch (error) { console.log('Error Merging the duplicate nodes:', error); diff --git a/frontend/src/services/PollingAPI.ts b/frontend/src/services/PollingAPI.ts index baa2e6080..f14ed0580 100644 --- a/frontend/src/services/PollingAPI.ts +++ b/frontend/src/services/PollingAPI.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { PollingAPI_Response, statusupdate } from '../types'; +import api from '../API/Index'; export default async function subscribe( fileName: string, @@ -19,8 +18,8 @@ export default async function subscribe( while (pollingAttempts < MAX_POLLING_ATTEMPTS) { let currentdelay = delay; - let response: PollingAPI_Response = await axios.get( - `${url()}/document_status/${fileName}?url=${uri}&userName=${username}&password=${encodedstr}&database=${database}` + let response: PollingAPI_Response = await api.get( + `/document_status/${fileName}?url=${uri}&userName=${username}&password=${encodedstr}&database=${database}` ); if (response.data?.file_name?.status === 'Processing') { diff --git a/frontend/src/services/PostProcessing.ts b/frontend/src/services/PostProcessing.ts index 9f45cc6bd..98c94c238 100644 --- a/frontend/src/services/PostProcessing.ts +++ b/frontend/src/services/PostProcessing.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { UserCredentials } from '../types'; +import api from '../API/Index'; const postProcessing = async (userCredentials: UserCredentials, taskParam: string[]) => { try { @@ -10,7 +9,7 @@ const postProcessing = async (userCredentials: UserCredentials, taskParam: strin formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); formData.append('tasks', JSON.stringify(taskParam)); - const response = await axios.post(`${url()}/post_processing`, formData, { + const response = await api.post(`/post_processing`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, diff --git a/frontend/src/services/QnaAPI.ts b/frontend/src/services/QnaAPI.ts index 931618839..78fa240ba 100644 --- a/frontend/src/services/QnaAPI.ts +++ b/frontend/src/services/QnaAPI.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { UserCredentials } from '../types'; +import api from '../API/Index'; export const chatBotAPI = async ( userCredentials: UserCredentials, @@ -22,7 +21,7 @@ export const chatBotAPI = async ( formData.append('mode', mode); formData.append('document_names', JSON.stringify(document_names)); const startTime = Date.now(); - const response = await axios.post(`${url()}/chat_bot`, formData, { + const response = await api.post(`/chat_bot`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, @@ -44,7 +43,7 @@ export const clearChatAPI = async (userCredentials: UserCredentials, session_id: formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); formData.append('session_id', session_id); - const response = await axios.post(`${url()}/clear_chat_bot`, formData, { + const response = await api.post(`/clear_chat_bot`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, diff --git a/frontend/src/services/SchemaFromTextAPI.ts b/frontend/src/services/SchemaFromTextAPI.ts index 785fb0d68..3d1984ccf 100644 --- a/frontend/src/services/SchemaFromTextAPI.ts +++ b/frontend/src/services/SchemaFromTextAPI.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { ScehmaFromText } from '../types'; +import api from '../API/Index'; export const getNodeLabelsAndRelTypesFromText = async (model: string, inputText: string, isSchemaText: boolean) => { const formData = new FormData(); @@ -9,7 +8,7 @@ export const getNodeLabelsAndRelTypesFromText = async (model: string, inputText: formData.append('is_schema_description_checked', JSON.stringify(isSchemaText)); try { - const response = await axios.post(`${url()}/populate_graph_schema`, formData); + const response = await api.post(`/populate_graph_schema`, formData); return response; } catch (error) { console.log(error); diff --git a/frontend/src/services/URLScan.ts b/frontend/src/services/URLScan.ts index 58ad37dfb..444022934 100644 --- a/frontend/src/services/URLScan.ts +++ b/frontend/src/services/URLScan.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { ScanProps, ServerResponse } from '../types'; +import api from '../API/Index'; const urlScanAPI = async (props: ScanProps) => { try { @@ -46,7 +45,7 @@ const urlScanAPI = async (props: ScanProps) => { formData.append('access_token', props.access_token); } - const response: ServerResponse = await axios.post(`${url()}/url/scan`, formData, { + const response: ServerResponse = await api.post(`/url/scan`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, diff --git a/frontend/src/services/vectorIndexCreation.ts b/frontend/src/services/vectorIndexCreation.ts index accf85659..e89767551 100644 --- a/frontend/src/services/vectorIndexCreation.ts +++ b/frontend/src/services/vectorIndexCreation.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { commonserverresponse, UserCredentials } from '../types'; +import api from '../API/Index'; export const createVectorIndex = async (userCredentials: UserCredentials, isVectorIndexExists: boolean) => { const formData = new FormData(); @@ -10,7 +9,7 @@ export const createVectorIndex = async (userCredentials: UserCredentials, isVect formData.append('password', userCredentials?.password ?? ''); formData.append('isVectorIndexExist', JSON.stringify(isVectorIndexExists)); try { - const response = await axios.post(`${url()}/drop_create_vector_index`, formData); + const response = await api.post(`/drop_create_vector_index`, formData); return response; } catch (error) { console.log(error); From a241c6bcbb64441c886231d72e738cee10f32ec2 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 22 Aug 2024 08:25:56 +0000 Subject: [PATCH 017/292] disabled the toolip when dropdown is open state --- frontend/src/components/Dropdown.tsx | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index f046bb8ff..ed7c5fe6c 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -1,6 +1,6 @@ import { Dropdown, Tip } from '@neo4j-ndl/react'; import { OptionType, ReusableDropdownProps } from '../types'; -import { useMemo } from 'react'; +import { useMemo, useReducer } from 'react'; import { capitalize } from '../utils/Utils'; const DropdownComponent: React.FC = ({ @@ -13,6 +13,7 @@ const DropdownComponent: React.FC = ({ isDisabled, value, }) => { + const [disableTooltip, toggleDisableState] = useReducer((state)=>!state,false) const handleChange = (selectedOption: OptionType | null | void) => { onSelect(selectedOption); }; @@ -20,7 +21,7 @@ const DropdownComponent: React.FC = ({ return ( <>
- + = ({ menuPlacement: 'auto', isDisabled: isDisabled, value: value, + onMenuOpen:()=>{ + toggleDisableState() + }, + onMenuClose:()=>{ + toggleDisableState() + } }} size='medium' fluid From 426906bb5e2c7c2e71721195314c6c4211f55447 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 22 Aug 2024 08:32:39 +0000 Subject: [PATCH 018/292] format fixes + chat mode naming changes --- backend/src/QA_integration_new.py | 2 +- frontend/src/API/Index.ts | 12 +-- .../src/components/ChatBot/ChatModeToggle.tsx | 7 +- frontend/src/components/Dropdown.tsx | 12 +-- .../src/components/Graph/GraphViewModal.tsx | 94 +++++++++++-------- frontend/src/components/Graph/LegendsChip.tsx | 12 +-- frontend/src/components/Layout/PageLayout.tsx | 2 +- .../Popups/DeletePopUp/DeletePopUp.tsx | 2 +- .../Deduplication/index.tsx | 4 +- .../DeleteTabForOrphanNodes/index.tsx | 2 +- .../Popups/GraphEnhancementDialog/index.tsx | 2 +- .../Popups/LargeFilePopUp/LargeFilesAlert.tsx | 2 +- frontend/src/components/QuickStarter.tsx | 2 +- frontend/src/services/GetFiles.ts | 4 +- frontend/src/utils/Constants.ts | 4 +- 15 files changed, 85 insertions(+), 78 deletions(-) diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration_new.py index 9ab42a497..eeac78c1e 100644 --- a/backend/src/QA_integration_new.py +++ b/backend/src/QA_integration_new.py @@ -41,7 +41,7 @@ def get_neo4j_retriever(graph, retrieval_query,document_names,mode,index_name="vector",keyword_index="keyword", search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): try: - if mode == "fulltext" or mode == "fulltext+graph": + if mode == "fulltext" or mode == "graph + vector + fulltext": neo_db = Neo4jVector.from_existing_graph( embedding=EMBEDDING_FUNCTION, index_name=index_name, diff --git a/frontend/src/API/Index.ts b/frontend/src/API/Index.ts index d086970e4..f4ad15cbe 100644 --- a/frontend/src/API/Index.ts +++ b/frontend/src/API/Index.ts @@ -1,7 +1,7 @@ -import axios from 'axios' -import { url } from '../utils/Utils' +import axios from 'axios'; +import { url } from '../utils/Utils'; -const api=axios.create({ - baseURL:url() -}) -export default api \ No newline at end of file +const api = axios.create({ + baseURL: url(), +}); +export default api; diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 4c0d54bc7..e82dfea4d 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -31,7 +31,12 @@ export default function ChatModeToggle({ () => chatModes?.map((m) => { return { - title: capitalize(m), + title: m.includes('+') + ? m + .split('+') + .map((s) => capitalize(s)) + .join('+') + : capitalize(m), onClick: () => { setchatMode(m); }, diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index ed7c5fe6c..ba949aec0 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -13,7 +13,7 @@ const DropdownComponent: React.FC = ({ isDisabled, value, }) => { - const [disableTooltip, toggleDisableState] = useReducer((state)=>!state,false) + const [disableTooltip, toggleDisableState] = useReducer((state) => !state, false); const handleChange = (selectedOption: OptionType | null | void) => { onSelect(selectedOption); }; @@ -50,12 +50,12 @@ const DropdownComponent: React.FC = ({ menuPlacement: 'auto', isDisabled: isDisabled, value: value, - onMenuOpen:()=>{ - toggleDisableState() + onMenuOpen: () => { + toggleDisableState(); + }, + onMenuClose: () => { + toggleDisableState(); }, - onMenuClose:()=>{ - toggleDisableState() - } }} size='medium' fluid diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index ca407b4bb..39438b788 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -97,10 +97,10 @@ const GraphViewModal: React.FunctionComponent = ({ graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -135,10 +135,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -196,43 +196,50 @@ const GraphViewModal: React.FunctionComponent = ({ }, [open]); // The search and update nodes - const handleSearch = useCallback((value: string) => { - const query = value.toLowerCase(); - const updatedNodes = nodes.map((node) => { - if (query === '') { + const handleSearch = useCallback( + (value: string) => { + const query = value.toLowerCase(); + const updatedNodes = nodes.map((node) => { + if (query === '') { + return { + ...node, + activated: false, + selected: false, + size: graphLabels.nodeSize, + }; + } + const { id, properties, caption } = node; + const propertiesMatch = properties?.id?.toLowerCase().includes(query); + const match = id.toLowerCase().includes(query) || propertiesMatch || caption?.toLowerCase().includes(query); return { ...node, + activated: match, + selected: match, + size: + match && viewPoint === graphLabels.showGraphView + ? 100 + : match && viewPoint !== graphLabels.showGraphView + ? 50 + : graphLabels.nodeSize, + }; + }); + // deactivating any active relationships + const updatedRelationships = relationships.map((rel) => { + return { + ...rel, activated: false, selected: false, - size: graphLabels.nodeSize }; - } - const { id, properties, caption } = node; - const propertiesMatch = properties?.id?.toLowerCase().includes(query); - const match = id.toLowerCase().includes(query) || propertiesMatch || caption?.toLowerCase().includes(query); - return { - ...node, - activated: match, - selected: match, - size: (match && viewPoint === graphLabels.showGraphView) ? 100 : (match && viewPoint !== graphLabels.showGraphView) ? 50 : graphLabels.nodeSize - }; - - }); - // deactivating any active relationships - const updatedRelationships = relationships.map((rel) => { - return { - ...rel, - activated: false, - selected: false, - }; - }); - setNodes(updatedNodes); - setRelationships(updatedRelationships); - }, [nodes]); + }); + setNodes(updatedNodes); + setRelationships(updatedRelationships); + }, + [nodes] + ); useEffect(() => { - handleSearch(debouncedQuery) - }, [debouncedQuery]) + handleSearch(debouncedQuery); + }, [debouncedQuery]); const initGraph = ( graphType: GraphType[], @@ -340,7 +347,12 @@ const GraphViewModal: React.FunctionComponent = ({ ...node, activated: isActive, selected: isActive, - size: (isActive && viewPoint === graphLabels.showGraphView) ? 100 : (isActive && viewPoint !== graphLabels.showGraphView) ? 50 : graphLabels.nodeSize + size: + isActive && viewPoint === graphLabels.showGraphView + ? 100 + : isActive && viewPoint !== graphLabels.showGraphView + ? 50 + : graphLabels.nodeSize, }; }); // deactivating any active relationships @@ -372,7 +384,7 @@ const GraphViewModal: React.FunctionComponent = ({ ...node, activated: false, selected: false, - size: graphLabels.nodeSize + size: graphLabels.nodeSize, }; }); if (searchQuery !== '') { @@ -568,4 +580,4 @@ const GraphViewModal: React.FunctionComponent = ({ ); }; -export default GraphViewModal; \ No newline at end of file +export default GraphViewModal; diff --git a/frontend/src/components/Graph/LegendsChip.tsx b/frontend/src/components/Graph/LegendsChip.tsx index 2fdc16df2..2ab9c7b40 100644 --- a/frontend/src/components/Graph/LegendsChip.tsx +++ b/frontend/src/components/Graph/LegendsChip.tsx @@ -1,14 +1,6 @@ import { LegendChipProps } from '../../types'; import Legend from '../UI/Legend'; -export const LegendsChip: React.FunctionComponent = ({ - scheme, - label, - type, - count, - onClick, -}) => { - return ( - - ); +export const LegendsChip: React.FunctionComponent = ({ scheme, label, type, count, onClick }) => { + return ; }; diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 1a2f755b4..8aca58109 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -175,4 +175,4 @@ export default function PageLayoutNew({ />
); -} \ No newline at end of file +} diff --git a/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx b/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx index eb1522868..1b1bd58c8 100644 --- a/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx +++ b/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx @@ -54,4 +54,4 @@ export default function DeletePopUp({ ); -} \ No newline at end of file +} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 36fcff3d1..f5a021e30 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -80,12 +80,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - d.e.elementId === nodeid + (d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d + : d) ); }); }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx index f836c9e87..373007e95 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx @@ -279,4 +279,4 @@ export default function DeletePopUpForOrphanNodes({ ); -} \ No newline at end of file +} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx index 5e2aaf713..84577c53c 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx @@ -126,4 +126,4 @@ export default function GraphEnhancementDialog({ ); -} \ No newline at end of file +} diff --git a/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx b/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx index b2eb01a52..26c9cfcda 100644 --- a/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx +++ b/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx @@ -100,4 +100,4 @@ const LargeFilesAlert: FC = ({ largeFiles, handleToggle, checke ); }; -export default LargeFilesAlert; \ No newline at end of file +export default LargeFilesAlert; diff --git a/frontend/src/components/QuickStarter.tsx b/frontend/src/components/QuickStarter.tsx index 29832fee1..db2cba7e7 100644 --- a/frontend/src/components/QuickStarter.tsx +++ b/frontend/src/components/QuickStarter.tsx @@ -42,4 +42,4 @@ const QuickStarter: React.FunctionComponent = () => {
); }; -export default QuickStarter; \ No newline at end of file +export default QuickStarter; diff --git a/frontend/src/services/GetFiles.ts b/frontend/src/services/GetFiles.ts index 0744ff047..056a9cc05 100644 --- a/frontend/src/services/GetFiles.ts +++ b/frontend/src/services/GetFiles.ts @@ -5,9 +5,7 @@ export const getSourceNodes = async (userCredentials: UserCredentials) => { try { const encodedstr = btoa(userCredentials.password); const response = await api.get( - `/sources_list?uri=${userCredentials.uri}&database=${userCredentials.database}&userName=${ - userCredentials.userName - }&password=${encodedstr}` + `/sources_list?uri=${userCredentials.uri}&database=${userCredentials.database}&userName=${userCredentials.userName}&password=${encodedstr}` ); return response; } catch (error) { diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index ee2296a3a..4222e8a7b 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -59,7 +59,7 @@ export const defaultLLM = llms?.includes('openai-gpt-4o-mini') export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',') - : ['vector', 'graph', 'graph+vector', 'fulltext', 'fulltext+graph']; + : ['vector', 'graph', 'graph+vector', 'fulltext', 'graph+vector+fulltext']; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; export const timePerByte = 0.2; @@ -232,7 +232,7 @@ export const graphLabels = { noEntities: 'No Entities Found', selectCheckbox: 'Select atleast one checkbox for graph view', totalRelationships: 'Total Relationships', - nodeSize: 30 + nodeSize: 30, }; export const RESULT_STEP_SIZE = 25; From 7d0b431728ce642cdbddc9c033b84b0b2c8c92f6 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:50:41 +0000 Subject: [PATCH 019/292] mode added to info model for entities --- frontend/src/components/ChatBot/ChatInfoModal.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 42dad1893..25391b00e 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -175,7 +175,7 @@ const ChatInfoModal: React.FC = ({ ) : ( {mode != 'graph' ? Sources used : <>} - {mode === 'graph+vector' || mode === 'graph' ? Top Entities used : <>} + {mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? Top Entities used : <>} {mode === 'graph' && cypher_query?.trim().length ? ( Generated Cypher Query ) : ( From f2b1e172a70c8dfd63cda55007fa5ed4371e64a0 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Mon, 26 Aug 2024 09:07:54 +0000 Subject: [PATCH 020/292] Issue fixed, List out of index while getting status of dicuement node --- backend/src/main.py | 160 +++++++++++++++++++++++--------------------- 1 file changed, 83 insertions(+), 77 deletions(-) diff --git a/backend/src/main.py b/backend/src/main.py index f7dd190ef..16eb0e622 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -277,91 +277,97 @@ def processing_source(uri, userName, password, database, model, file_name, pages create_chunks_obj = CreateChunksofDocument(pages, graph) chunks = create_chunks_obj.split_file_into_chunks() chunkId_chunkDoc_list = create_relation_between_chunks(graph,file_name,chunks) - if result[0]['Status'] != 'Processing': - obj_source_node = sourceNode() - status = "Processing" - obj_source_node.file_name = file_name - obj_source_node.status = status - obj_source_node.total_chunks = len(chunks) - obj_source_node.total_pages = len(pages) - obj_source_node.model = model - logging.info(file_name) - logging.info(obj_source_node) - graphDb_data_Access.update_source_node(obj_source_node) - - logging.info('Update the status as Processing') - update_graph_chunk_processed = int(os.environ.get('UPDATE_GRAPH_CHUNKS_PROCESSED')) - # selected_chunks = [] - is_cancelled_status = False - job_status = "Completed" - node_count = 0 - rel_count = 0 - for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed): - select_chunks_upto = i+update_graph_chunk_processed - logging.info(f'Selected Chunks upto: {select_chunks_upto}') - if len(chunkId_chunkDoc_list) <= select_chunks_upto: - select_chunks_upto = len(chunkId_chunkDoc_list) - selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto] + + if len(result) > 0: + if result[0]['Status'] != 'Processing': + obj_source_node = sourceNode() + status = "Processing" + obj_source_node.file_name = file_name + obj_source_node.status = status + obj_source_node.total_chunks = len(chunks) + obj_source_node.total_pages = len(pages) + obj_source_node.model = model + logging.info(file_name) + logging.info(obj_source_node) + graphDb_data_Access.update_source_node(obj_source_node) + + logging.info('Update the status as Processing') + update_graph_chunk_processed = int(os.environ.get('UPDATE_GRAPH_CHUNKS_PROCESSED')) + # selected_chunks = [] + is_cancelled_status = False + job_status = "Completed" + node_count = 0 + rel_count = 0 + for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed): + select_chunks_upto = i+update_graph_chunk_processed + logging.info(f'Selected Chunks upto: {select_chunks_upto}') + if len(chunkId_chunkDoc_list) <= select_chunks_upto: + select_chunks_upto = len(chunkId_chunkDoc_list) + selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto] + result = graphDb_data_Access.get_current_status_document_node(file_name) + is_cancelled_status = result[0]['is_cancelled'] + logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}") + if bool(is_cancelled_status) == True: + job_status = "Cancelled" + logging.info('Exit from running loop of processing file') + exit + else: + node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count) + end_time = datetime.now() + processed_time = end_time - start_time + + obj_source_node = sourceNode() + obj_source_node.file_name = file_name + obj_source_node.updated_at = end_time + obj_source_node.processing_time = processed_time + obj_source_node.node_count = node_count + obj_source_node.processed_chunk = select_chunks_upto + obj_source_node.relationship_count = rel_count + graphDb_data_Access.update_source_node(obj_source_node) + result = graphDb_data_Access.get_current_status_document_node(file_name) is_cancelled_status = result[0]['is_cancelled'] - logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}") if bool(is_cancelled_status) == True: - job_status = "Cancelled" - logging.info('Exit from running loop of processing file') - exit - else: - node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count) - end_time = datetime.now() - processed_time = end_time - start_time - - obj_source_node = sourceNode() - obj_source_node.file_name = file_name - obj_source_node.updated_at = end_time - obj_source_node.processing_time = processed_time - obj_source_node.node_count = node_count - obj_source_node.processed_chunk = select_chunks_upto - obj_source_node.relationship_count = rel_count - graphDb_data_Access.update_source_node(obj_source_node) - - result = graphDb_data_Access.get_current_status_document_node(file_name) - is_cancelled_status = result[0]['is_cancelled'] - if bool(is_cancelled_status) == True: - logging.info(f'Is_cancelled True at the end extraction') - job_status = 'Cancelled' - logging.info(f'Job Status at the end : {job_status}') - end_time = datetime.now() - processed_time = end_time - start_time - obj_source_node = sourceNode() - obj_source_node.file_name = file_name - obj_source_node.status = job_status - obj_source_node.processing_time = processed_time + logging.info(f'Is_cancelled True at the end extraction') + job_status = 'Cancelled' + logging.info(f'Job Status at the end : {job_status}') + end_time = datetime.now() + processed_time = end_time - start_time + obj_source_node = sourceNode() + obj_source_node.file_name = file_name + obj_source_node.status = job_status + obj_source_node.processing_time = processed_time - graphDb_data_Access.update_source_node(obj_source_node) - logging.info('Updated the nodeCount and relCount properties in Document node') - logging.info(f'file:{file_name} extraction has been completed') + graphDb_data_Access.update_source_node(obj_source_node) + logging.info('Updated the nodeCount and relCount properties in Document node') + logging.info(f'file:{file_name} extraction has been completed') - # merged_file_path have value only when file uploaded from local - - if is_uploaded_from_local: - gcs_file_cache = os.environ.get('GCS_FILE_CACHE') - if gcs_file_cache == 'True': - folder_name = create_gcs_bucket_folder_name_hashed(uri, file_name) - delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name) - else: - delete_uploaded_local_file(merged_file_path, file_name) + # merged_file_path have value only when file uploaded from local - return { - "fileName": file_name, - "nodeCount": node_count, - "relationshipCount": rel_count, - "processingTime": round(processed_time.total_seconds(),2), - "status" : job_status, - "model" : model, - "success_count" : 1 - } + if is_uploaded_from_local: + gcs_file_cache = os.environ.get('GCS_FILE_CACHE') + if gcs_file_cache == 'True': + folder_name = create_gcs_bucket_folder_name_hashed(uri, file_name) + delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name) + else: + delete_uploaded_local_file(merged_file_path, file_name) + + return { + "fileName": file_name, + "nodeCount": node_count, + "relationshipCount": rel_count, + "processingTime": round(processed_time.total_seconds(),2), + "status" : job_status, + "model" : model, + "success_count" : 1 + } + else: + logging.info('File does not process because it\'s already in Processing status') else: - logging.info('File does not process because it\'s already in Processing status') + error_message = "Unable to get the status of docuemnt node." + logging.error(error_message) + raise Exception(error_message) def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship, node_count, rel_count): #create vector index and update chunk node with embedding From 94c493eaef19386e07793f253aee578c54e44509 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Mon, 26 Aug 2024 11:55:36 +0000 Subject: [PATCH 021/292] processing count updated on cancel --- frontend/src/components/Content.tsx | 2 +- frontend/src/components/FileTable.tsx | 7 +++++++ frontend/src/hooks/useSse.tsx | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index ec4fad9bb..a1a1e69b1 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -381,7 +381,7 @@ const Content: React.FC = ({ setextractLoading(false); await postProcessing(userCredentials as UserCredentials, postProcessingTasks); }); - } else if (queueFiles && !queue.isEmpty()) { + } else if (queueFiles && !queue.isEmpty()&&processingFilesCount { setextractLoading(false); diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 70dff8f1a..8c334a7b4 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -761,6 +761,13 @@ const FileTable = forwardRef((props, ref) => { return curfile; }) ); + setProcessedCount((prev) => { + if (prev == batchSize) { + return batchSize - 1; + } + return prev + 1; + }); + queue.remove(fileName) } else { let errorobj = { error: res.data.error, message: res.data.message, fileName }; throw new Error(JSON.stringify(errorobj)); diff --git a/frontend/src/hooks/useSse.tsx b/frontend/src/hooks/useSse.tsx index f8a07f61e..36be24c7b 100644 --- a/frontend/src/hooks/useSse.tsx +++ b/frontend/src/hooks/useSse.tsx @@ -45,7 +45,7 @@ export default function useServerSideEvent( }); }); } - } else if (status === 'Completed' || status === 'Cancelled') { + } else if (status === 'Completed') { setFilesData((prevfiles) => { return prevfiles.map((curfile) => { if (curfile.name == fileName) { From 3ef88b65492aee3c6fd9f1c0d70d4be424bb3869 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 27 Aug 2024 10:04:19 +0000 Subject: [PATCH 022/292] format fixes --- .../src/components/ChatBot/ChatInfoModal.tsx | 6 +++++- frontend/src/components/Content.tsx | 2 +- frontend/src/components/FileTable.tsx | 2 +- frontend/src/components/Graph/GraphViewModal.tsx | 16 ++++++++-------- .../Deduplication/index.tsx | 4 ++-- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 25391b00e..89c15ea72 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -175,7 +175,11 @@ const ChatInfoModal: React.FC = ({ ) : ( {mode != 'graph' ? Sources used : <>} - {mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? Top Entities used : <>} + {mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? ( + Top Entities used + ) : ( + <> + )} {mode === 'graph' && cypher_query?.trim().length ? ( Generated Cypher Query ) : ( diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index a1a1e69b1..64945d5d5 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -381,7 +381,7 @@ const Content: React.FC = ({ setextractLoading(false); await postProcessing(userCredentials as UserCredentials, postProcessingTasks); }); - } else if (queueFiles && !queue.isEmpty()&&processingFilesCount { setextractLoading(false); diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 8c334a7b4..23310eaf4 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -767,7 +767,7 @@ const FileTable = forwardRef((props, ref) => { } return prev + 1; }); - queue.remove(fileName) + queue.remove(fileName); } else { let errorobj = { error: res.data.error, message: res.data.message, fileName }; throw new Error(JSON.stringify(errorobj)); diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 0c8153fe0..39438b788 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -97,10 +97,10 @@ const GraphViewModal: React.FunctionComponent = ({ graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -135,10 +135,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 36fcff3d1..f5a021e30 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -80,12 +80,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - d.e.elementId === nodeid + (d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d + : d) ); }); }; From dadaa288c6302e7ba5b00915ba4468dbc568d4d3 Mon Sep 17 00:00:00 2001 From: edenbuaa Date: Tue, 27 Aug 2024 21:56:35 +0800 Subject: [PATCH 023/292] remove whitespace for enviroment variable which due to an error "xxx may not contain whitespace" (#707) --- example.env | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/example.env b/example.env index ed33101fb..23bcc6e06 100644 --- a/example.env +++ b/example.env @@ -1,27 +1,27 @@ # Mandatory -OPENAI_API_KEY = "" -DIFFBOT_API_KEY = "" +OPENAI_API_KEY="" +DIFFBOT_API_KEY="" # Optional Backend -EMBEDDING_MODEL = "all-MiniLM-L6-v2" -IS_EMBEDDING = "true" -KNN_MIN_SCORE = "0.94" +EMBEDDING_MODEL="all-MiniLM-L6-v2" +IS_EMBEDDING="true" +KNN_MIN_SCORE="0.94" # Enable Gemini (default is False) | Can be False or True -GEMINI_ENABLED = False +GEMINI_ENABLED=False # LLM_MODEL_CONFIG_ollama_llama3="llama3,http://host.docker.internal:11434" # Enable Google Cloud logs (default is False) | Can be False or True -GCP_LOG_METRICS_ENABLED = False -NUMBER_OF_CHUNKS_TO_COMBINE = 6 -UPDATE_GRAPH_CHUNKS_PROCESSED = 20 -NEO4J_URI = "neo4j://database:7687" -NEO4J_USERNAME = "neo4j" -NEO4J_PASSWORD = "password" -LANGCHAIN_API_KEY = "" -LANGCHAIN_PROJECT = "" -LANGCHAIN_TRACING_V2 = "true" -LANGCHAIN_ENDPOINT = "https://api.smith.langchain.com" -GCS_FILE_CACHE = False +GCP_LOG_METRICS_ENABLED=False +NUMBER_OF_CHUNKS_TO_COMBINE=6 +UPDATE_GRAPH_CHUNKS_PROCESSED=20 +NEO4J_URI="neo4j://database:7687" +NEO4J_USERNAME="neo4j" +NEO4J_PASSWORD="password" +LANGCHAIN_API_KEY="" +LANGCHAIN_PROJECT="" +LANGCHAIN_TRACING_V2="true" +LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" +GCS_FILE_CACHE=False ENTITY_EMBEDDING=True # Optional Frontend From 4c6f676190a79159577112b3cd284c306d9ad146 Mon Sep 17 00:00:00 2001 From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:06:45 +0000 Subject: [PATCH 024/292] updated disconnected nodes --- backend/Performance_test.py | 1 + backend/src/main.py | 1 + backend/test_integrationqa.py | 122 ++++++++++++++++++++++++++-------- 3 files changed, 96 insertions(+), 28 deletions(-) diff --git a/backend/Performance_test.py b/backend/Performance_test.py index fc0aee66f..712d3daf1 100644 --- a/backend/Performance_test.py +++ b/backend/Performance_test.py @@ -94,6 +94,7 @@ def performance_main(): for _ in range(CONCURRENT_REQUESTS): futures.append(executor.submit(post_request_chunk)) + # Chatbot request futures # Chatbot request futures # for message in CHATBOT_MESSAGES: # futures.append(executor.submit(chatbot_request, message)) diff --git a/backend/src/main.py b/backend/src/main.py index 16eb0e622..a7d5058a0 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -264,6 +264,7 @@ def processing_source(uri, userName, password, database, model, file_name, pages graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.get_current_status_document_node(file_name) + print(result) logging.info("Break down file into chunks") bad_chars = ['"', "\n", "'"] for i in range(0,len(pages)): diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 20f3effb0..6f931058e 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -69,9 +69,7 @@ def test_graph_from_wikipedia(model_name): file_name = "Ram_Mandir" create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, source_type) - wiki_result = extract_graph_from_file_Wikipedia( - URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 1, 'en', '', '' - ) + wiki_result = extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 1, 'en', '', '') logging.info("Wikipedia test done") print(wiki_result) @@ -85,6 +83,27 @@ def test_graph_from_wikipedia(model_name): return wiki_result +def test_graph_website(model_name): + """Test graph creation from a Website page.""" + #graph, model, source_url, source_type + source_url = 'https://www.amazon.com/' + source_type = 'web-url' + create_source_node_graph_web_url(graph, model_name, source_url, source_type) + + weburl_result = extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', '') + logging.info("WebUrl test done") + print(weburl_result) + + try: + assert weburl_result['status'] == 'Completed' + assert weburl_result['nodeCount'] > 0 + assert weburl_result['relationshipCount'] > 0 + print("Success") + except AssertionError as e: + print("Fail: ", e) + return weburl_result + + def test_graph_from_youtube_video(model_name): """Test graph creation from a YouTube video.""" source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' @@ -115,52 +134,99 @@ def test_chatbot_qna(model_name, mode='graph+vector'): try: assert len(QA_n_RAG['message']) > 20 + return QA_n_RAG + print("Success") + except AssertionError as e: + print("Failed ", e) + return QA_n_RAG + +# Check the Functionality of Chatbot QnA for mode 'vector' +def test_chatbot_QnA_vector(model_name): + model = model_name + QA_n_RAG_vector = QA_RAG(graph, model, 'Tell me about amazon', '[]', 1, 'vector') + + + print(QA_n_RAG_vector) + print(len(QA_n_RAG_vector['message'])) + try: + assert len(QA_n_RAG_vector['message']) > 20 + return QA_n_RAG_vector print("Success") except AssertionError as e: print("Failed: ", e) return QA_n_RAG -def compare_graph_results(results): - """ - Compare graph results across different models. - Add custom logic here to compare graph data, nodes, and relationships. - """ - # Placeholder logic for comparison - print("Comparing results...") - for i in range(len(results) - 1): - result_a = results[i] - result_b = results[i + 1] - if result_a == result_b: - print(f"Result {i} is identical to result {i+1}") +#Get disconnected_nodes list +def disconected_nodes(): + #graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes() + if total_nodes['total']>0: + return "True" + else: + return "False" + +#Delete disconnected_nodes list +# def delete_disconected_nodes(): +# #graph = create_graph_database_connection(uri, userName, password, database) +# graphDb_data_Access = graphDBdataAccess(graph) +# result = graphDb_data_Access.delete_unconnected_nodes(unconnected_entities_list) + +#Get Duplicate_nodes +def get_duplicate_nodes(): + #graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + nodes_list, total_nodes = graphDb_data_Access.get_duplicate_nodes_list() + if total_nodes['total']>0: + return "True" else: - print(f"Result {i} differs from result {i+1}") + return "False" + print(nodes_list) + print(total_nodes) + +# def compare_graph_results(results): +# """ +# Compare graph results across different models. +# Add custom logic here to compare graph data, nodes, and relationships. +# """ +# # Placeholder logic for comparison +# print("Comparing results...") +# for i in range(len(results) - 1): +# result_a = results[i] +# result_b = results[i + 1] +# if result_a == result_b: +# print(f"Result {i} is identical to result {i+1}") +# else: +# print(f"Result {i} differs from result {i+1}") def run_tests(): final_list = [] error_list = [] - models = [ - 'openai-gpt-3.5', 'openai-gpt-4o', 'openai-gpt-4o-mini', 'azure_ai_gpt_35', - 'azure_ai_gpt_4o', 'anthropic_claude_3_5_sonnet', 'fireworks_v3p1_405b', - 'fireworks_llama_v3_70b', 'ollama_llama3', 'bedrock_claude_3_5_sonnet' - ] + models = ['openai-gpt-3.5', 'openai-gpt-4o'] for model_name in models: try: - final_list.append(test_graph_from_file_local(model_name)) + # final_list.append(test_graph_from_file_local(model_name)) final_list.append(test_graph_from_wikipedia(model_name)) - final_list.append(test_graph_from_youtube_video(model_name)) - final_list.append(test_chatbot_qna(model_name)) - final_list.append(test_chatbot_qna(model_name, mode='vector')) - final_list.append(test_chatbot_qna(model_name, mode='hybrid')) + # final_list.append(test_graph_website(model_name)) + # final_list.append(test_graph_from_youtube_video(model_name)) + # final_list.append(test_chatbot_qna(model_name)) + # final_list.append(test_chatbot_qna(model_name, mode='vector')) + # final_list.append(test_chatbot_qna(model_name, mode='hybrid')) except Exception as e: error_list.append((model_name, str(e))) #Compare and log diffrences in graph results - compare_graph_results(final_list) # Pass the final_list to comapre_graph_results - + # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results + + dis = disconected_nodes() + dup = get_duplicate_nodes() # Save final results to CSV df = pd.DataFrame(final_list) + print(df) df['execution_date'] = dt.today().strftime('%Y-%m-%d') + df['disconnected_nodes']=dis + df['get_duplicate_nodes']=dup df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) # Save error details to CSV From 568db51c733e5b9cc140ea0d6aeaf0d63b842f52 Mon Sep 17 00:00:00 2001 From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:11:14 +0000 Subject: [PATCH 025/292] updated disconnected nodes --- backend/test_integrationqa.py | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 6f931058e..b2222ebea 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -126,7 +126,7 @@ def test_graph_from_youtube_video(model_name): return youtube_result -def test_chatbot_qna(model_name, mode='graph+vector'): +def test_chatbot_qna(model_name, mode='vector'): """Test chatbot QnA functionality for different modes.""" QA_n_RAG = QA_RAG(graph, model_name, 'Tell me about amazon', '[]', 1, mode) print(QA_n_RAG) @@ -139,24 +139,7 @@ def test_chatbot_qna(model_name, mode='graph+vector'): except AssertionError as e: print("Failed ", e) return QA_n_RAG - -# Check the Functionality of Chatbot QnA for mode 'vector' -def test_chatbot_QnA_vector(model_name): - model = model_name - QA_n_RAG_vector = QA_RAG(graph, model, 'Tell me about amazon', '[]', 1, 'vector') - - - print(QA_n_RAG_vector) - print(len(QA_n_RAG_vector['message'])) - try: - assert len(QA_n_RAG_vector['message']) > 20 - return QA_n_RAG_vector - print("Success") - except AssertionError as e: - print("Failed: ", e) - - return QA_n_RAG - + #Get disconnected_nodes list def disconected_nodes(): #graph = create_graph_database_connection(uri, userName, password, database) @@ -182,8 +165,6 @@ def get_duplicate_nodes(): return "True" else: return "False" - print(nodes_list) - print(total_nodes) # def compare_graph_results(results): # """ @@ -208,12 +189,12 @@ def run_tests(): for model_name in models: try: # final_list.append(test_graph_from_file_local(model_name)) - final_list.append(test_graph_from_wikipedia(model_name)) - # final_list.append(test_graph_website(model_name)) + # final_list.append(test_graph_from_wikipedia(model_name)) + final_list.append(test_graph_website(model_name)) # final_list.append(test_graph_from_youtube_video(model_name)) - # final_list.append(test_chatbot_qna(model_name)) + final_list.append(test_chatbot_qna(model_name)) # final_list.append(test_chatbot_qna(model_name, mode='vector')) - # final_list.append(test_chatbot_qna(model_name, mode='hybrid')) + # final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext')) except Exception as e: error_list.append((model_name, str(e))) #Compare and log diffrences in graph results From 501ec6b741622282bd13c37d0713a2dfaccda78a Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 28 Aug 2024 05:02:20 +0000 Subject: [PATCH 026/292] fix: Processed count update on failed condition --- frontend/src/components/Content.tsx | 66 +++++++++++++++++++---------- frontend/src/hooks/useSse.tsx | 3 +- 2 files changed, 44 insertions(+), 25 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 64945d5d5..039c04106 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -31,6 +31,8 @@ import FallBackDialog from './UI/FallBackDialog'; import DeletePopUp from './Popups/DeletePopUp/DeletePopUp'; import GraphEnhancementDialog from './Popups/GraphEnhancementDialog'; import { tokens } from '@neo4j-ndl/base'; +import axios from 'axios'; + const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); let afterFirstRender = false; @@ -180,6 +182,7 @@ const Content: React.FC = ({ }; const extractHandler = async (fileItem: CustomFile, uid: string) => { + queue.remove(fileItem.name as string); try { setFilesData((prevfiles) => prevfiles.map((curfile) => { @@ -252,28 +255,45 @@ const Content: React.FC = ({ }); } } catch (err: any) { - const error = JSON.parse(err.message); - if (Object.keys(error).includes('fileName')) { - const { message } = error; - const { fileName } = error; - const errorMessage = error.message; - setalertDetails({ - showAlert: true, - alertType: 'error', - alertMessage: message, - }); - setFilesData((prevfiles) => - prevfiles.map((curfile) => { - if (curfile.name == fileName) { - return { - ...curfile, - status: 'Failed', - errorMessage, - }; - } - return curfile; - }) - ); + if (err instanceof Error) { + try { + const error = JSON.parse(err.message); + if (Object.keys(error).includes('fileName')) { + setProcessedCount((prev) => { + if (prev == batchSize) { + return batchSize - 1; + } + return prev + 1; + }); + const { message, fileName } = error; + queue.remove(fileName); + const errorMessage = error.message; + setalertDetails({ + showAlert: true, + alertType: 'error', + alertMessage: message, + }); + setFilesData((prevfiles) => + prevfiles.map((curfile) => { + if (curfile.name == fileName) { + return { ...curfile, status: 'Failed', errorMessage }; + } + return curfile; + }) + ); + } else { + console.error('Unexpected error format:', error); + } + } catch (parseError) { + if (axios.isAxiosError(err)) { + const axiosErrorMessage = err.response?.data?.message || err.message; + console.error('Axios error occurred:', axiosErrorMessage); + } else { + console.error('An unexpected error occurred:', err.message); + } + } + } else { + console.error('An unknown error occurred:', err); } } }; @@ -839,4 +859,4 @@ const Content: React.FC = ({ ); }; -export default Content; +export default Content; \ No newline at end of file diff --git a/frontend/src/hooks/useSse.tsx b/frontend/src/hooks/useSse.tsx index 36be24c7b..8b063751c 100644 --- a/frontend/src/hooks/useSse.tsx +++ b/frontend/src/hooks/useSse.tsx @@ -7,7 +7,7 @@ export default function useServerSideEvent( alertHandler: (inMinutes: boolean, minutes: number, filename: string) => void, errorHandler: (filename: string) => void ) { - const { setFilesData, setProcessedCount, queue } = useFileContext(); + const { setFilesData, setProcessedCount } = useFileContext(); function updateStatusForLargeFiles(eventSourceRes: eventResponsetypes) { const { fileName, @@ -67,7 +67,6 @@ export default function useServerSideEvent( } return prev + 1; }); - queue.remove(fileName); } else if (eventSourceRes.status === 'Failed') { setFilesData((prevfiles) => { return prevfiles.map((curfile) => { From 9941474aa0f213bd0afebcece2a70e4b8d275e0c Mon Sep 17 00:00:00 2001 From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Date: Wed, 28 Aug 2024 11:23:03 +0000 Subject: [PATCH 027/292] added disconnected and up nodes --- backend/test_integrationqa.py | 42 ++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index b2222ebea..4d0a5e1ea 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -1,3 +1,4 @@ +import json import os import shutil import logging @@ -145,16 +146,24 @@ def disconected_nodes(): #graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes() + print(nodes_list[0]["e"]["elementId"]) + + status = "False" + if total_nodes['total']>0: - return "True" + status = "True" else: - return "False" + status = "False" + + return nodes_list[0]["e"]["elementId"], status #Delete disconnected_nodes list -# def delete_disconected_nodes(): -# #graph = create_graph_database_connection(uri, userName, password, database) -# graphDb_data_Access = graphDBdataAccess(graph) -# result = graphDb_data_Access.delete_unconnected_nodes(unconnected_entities_list) +def delete_disconected_nodes(lst_element_id): + print(f'disconnect elementid list {lst_element_id}') + #graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id)) + print(f'delete disconnect api result {result}') #Get Duplicate_nodes def get_duplicate_nodes(): @@ -166,6 +175,11 @@ def get_duplicate_nodes(): else: return "False" +#Merge Duplicate_nodes +def test_merge_duplicate_nodes(): + graphDb_data_Access = graphDBdataAccess(graph) + result = graphDb_data_Access.merge_duplicate_nodes(duplicate_nodes_list) + # def compare_graph_results(results): # """ # Compare graph results across different models. @@ -188,25 +202,27 @@ def run_tests(): for model_name in models: try: - # final_list.append(test_graph_from_file_local(model_name)) - # final_list.append(test_graph_from_wikipedia(model_name)) + final_list.append(test_graph_from_file_local(model_name)) + final_list.append(test_graph_from_wikipedia(model_name)) final_list.append(test_graph_website(model_name)) - # final_list.append(test_graph_from_youtube_video(model_name)) + final_list.append(test_graph_from_youtube_video(model_name)) final_list.append(test_chatbot_qna(model_name)) - # final_list.append(test_chatbot_qna(model_name, mode='vector')) - # final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext')) + final_list.append(test_chatbot_qna(model_name, mode='vector')) + final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext')) except Exception as e: error_list.append((model_name, str(e))) #Compare and log diffrences in graph results # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results - dis = disconected_nodes() + dis_elementid, dis_status = disconected_nodes() + lst_element_id = [dis_elementid] + delete_disconected_nodes(lst_element_id) dup = get_duplicate_nodes() # Save final results to CSV df = pd.DataFrame(final_list) print(df) df['execution_date'] = dt.today().strftime('%Y-%m-%d') - df['disconnected_nodes']=dis + df['disconnected_nodes']=dis_status df['get_duplicate_nodes']=dup df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) From 8ae3b99d72edd456536e1a2b67a1f5d5f1b7d188 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Wed, 28 Aug 2024 17:15:49 +0000 Subject: [PATCH 028/292] removed __Entity__ labels --- backend/src/communities.py | 85 ++++++++++++++++++++++----------- backend/src/shared/common_fn.py | 4 +- 2 files changed, 58 insertions(+), 31 deletions(-) diff --git a/backend/src/communities.py b/backend/src/communities.py index b795ae4ed..e493a01dc 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -8,13 +8,37 @@ from tqdm import tqdm -COMMUNITY_PROJECT_NAME = "communities" -NODE_PROJECTION = "__Entity__" +COMMUNITY_PROJECTION_NAME = "communities" +NODE_PROJECTION = "!Chunk&!Document&!__Community__" MAX_WORKERS = 10 + +CREATE_COMMUNITY_GRAPH_PROJECTION = """ +MATCH (source:{node_projection}) +OPTIONAL MATCH (source)-[]->(target:{node_projection}) +WITH source, target, count(*) AS weight +WITH gds.graph.project( + {project_name}, + source, + target, + {{ + relationshipProperties: {{ weight: weight }} + }}, + {{ + undirectedRelationshipTypes: ['*'] + }} +) AS g +RETURN + g.graphName AS graph_name, + g.nodeCount AS nodes, + g.relationshipCount AS rels +""" + + CREATE_COMMUNITY_CONSTRAINT = "CREATE CONSTRAINT IF NOT EXISTS FOR (c:__Community__) REQUIRE c.id IS UNIQUE;" CREATE_COMMUNITY_LEVELS = """ -MATCH (e:`__Entity__`) +MATCH (e:!Chunk&!Document&!__Community__) +WHERE e.communities is NOT NULL UNWIND range(0, size(e.communities) - 1 , 1) AS index CALL { WITH e, index @@ -39,7 +63,7 @@ RETURN count(*) """ CREATE_COMMUNITY_RANKS = """ -MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(:__Entity__)<-[:MENTIONS]-(d:Document) +MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(:!Chunk&!Document&!__Community__)<-[:MENTIONS]-(d:Document) WITH c, count(distinct d) AS rank SET c.community_rank = rank; """ @@ -50,8 +74,7 @@ SET n.weight = chunkCount""" GET_COMMUNITY_INFO = """ -MATCH (c:`__Community__`)<-[:IN_COMMUNITY*]-(e:__Entity__) -WHERE c.level IN [0,1,4] +MATCH (c:`__Community__`)<-[:IN_COMMUNITY*]-(e:!Chunk&!Document&!__Community__) WITH c, collect(e ) AS nodes WHERE size(nodes) > 1 CALL apoc.path.subgraphAll(nodes[0], { @@ -90,39 +113,43 @@ def get_gds_driver(uri, username, password, database): logging.error(f"Failed to create GDS driver: {e}") raise -def create_community_graph_project(gds, project_name=COMMUNITY_PROJECT_NAME, node_projection=NODE_PROJECTION): +def create_community_graph_projection(gds, project_name=COMMUNITY_PROJECTION_NAME, node_projection=NODE_PROJECTION): try: existing_projects = gds.graph.list() project_exists = existing_projects["graphName"].str.contains(project_name, regex=False).any() if project_exists: - logging.info(f"Project '{project_name}' already exists. Dropping it.") + logging.info(f"Projection '{project_name}' already exists. Dropping it.") gds.graph.drop(project_name) logging.info(f"Creating new graph project '{project_name}'.") - graph_project, result = gds.graph.project( - project_name, - node_projection, - { - "_ALL_": { - "type": "*", - "orientation": "UNDIRECTED", - "properties": { - "weight": { - "property": "*", - "aggregation": "COUNT" - } - } - } - } - ) - logging.info(f"Graph project '{project_name}' created successfully.") - return graph_project, result + # graph_project, result = gds.graph.project( + # project_name, + # node_projection, + # { + # "_ALL_": { + # "type": "*", + # "orientation": "UNDIRECTED", + # "properties": { + # "weight": { + # "property": "*", + # "aggregation": "COUNT" + # } + # } + # } + # } + # ) + projection_query = CREATE_COMMUNITY_GRAPH_PROJECTION.format(node_projection=node_projection,project_name=project_name) + graph_projection_result = gds.run_cypher(projection_query) + projection_result = graph_projection_result.to_dict(orient="records")[0] + logging.info(f"Graph projection '{projection_result['graph_name']}' created successfully with {projection_result['nodes']} nodes and {projection_result['rels']} relationships.") + graph_project = gds.graph.get(projection_result['graph_name']) + return graph_project except Exception as e: logging.error(f"Failed to create community graph project: {e}") raise -def write_communities(gds, graph_project, project_name=COMMUNITY_PROJECT_NAME): +def write_communities(gds, graph_project, project_name=COMMUNITY_PROJECTION_NAME): try: logging.info(f"Writing communities to the graph project '{project_name}'.") gds.leiden.write( @@ -231,10 +258,10 @@ def create_community_properties(graph, model): logging.error(f"Failed to create community properties: {e}") raise -def create_communities(uri, username, password, database,graph,model): +def create_communities(uri, username, password, database,model): try: gds = get_gds_driver(uri, username, password, database) - graph_project, result = create_community_graph_project(gds) + graph_project = create_community_graph_projection(gds) write_communities_sucess = write_communities(gds, graph_project) if write_communities_sucess: logging.info("Applying community constraint to the graph.") diff --git a/backend/src/shared/common_fn.py b/backend/src/shared/common_fn.py index 6d24912c7..2037682e7 100644 --- a/backend/src/shared/common_fn.py +++ b/backend/src/shared/common_fn.py @@ -94,8 +94,8 @@ def load_embedding_model(embedding_model_name: str): return embeddings, dimension def save_graphDocuments_in_neo4j(graph:Neo4jGraph, graph_document_list:List[GraphDocument]): - graph.add_graph_documents(graph_document_list, baseEntityLabel=True,include_source=True) - # graph.add_graph_documents(graph_document_list) + # graph.add_graph_documents(graph_document_list, baseEntityLabel=True,include_source=True) + graph.add_graph_documents(graph_document_list) def handle_backticks_nodes_relationship_id_type(graph_document_list:List[GraphDocument]): for graph_document in graph_document_list: From 450ba6fd33ccf4d05a895a97a9df4228dde2816d Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Wed, 28 Aug 2024 17:56:57 +0000 Subject: [PATCH 029/292] removed graph_object --- backend/src/communities.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/backend/src/communities.py b/backend/src/communities.py index e493a01dc..cfbb1cc21 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -99,7 +99,6 @@ Summary:""" - def get_gds_driver(uri, username, password, database): try: gds = GraphDataScience( @@ -216,9 +215,9 @@ def process_community(community, community_chain): logging.error(f"Failed to process community {community.get('communityId', 'unknown')}: {e}") raise -def create_community_summaries(graph, model): +def create_community_summaries(gds, model): try: - community_info_list = graph.query(GET_COMMUNITY_INFO) + community_info_list = gds.run_cypher(GET_COMMUNITY_INFO) community_chain = get_community_chain(model) summaries = [] @@ -231,28 +230,28 @@ def create_community_summaries(graph, model): except Exception as e: logging.error(f"Failed to retrieve result for a community: {e}") - graph.query(STORE_COMMUNITY_SUMMARIES, params={"data": summaries}) + gds.run_cypher(STORE_COMMUNITY_SUMMARIES, params={"data": summaries}) except Exception as e: logging.error(f"Failed to create community summaries: {e}") raise -def create_community_properties(graph, model): +def create_community_properties(gds, model): try: # Create community levels - graph.query(CREATE_COMMUNITY_LEVELS) + gds.run_cypher(CREATE_COMMUNITY_LEVELS) logging.info("Successfully created community levels.") # Create community ranks - graph.query(CREATE_COMMUNITY_RANKS) + gds.run_cypher(CREATE_COMMUNITY_RANKS) logging.info("Successfully created community ranks.") # Create community weights - graph.query(CREATE_COMMUNITY_WEIGHTS) + gds.run_cypher(CREATE_COMMUNITY_WEIGHTS) logging.info("Successfully created community weights.") # Create community summaries - create_community_summaries(graph, model) + create_community_summaries(gds, model) logging.info("Successfully created community summaries.") except Exception as e: logging.error(f"Failed to create community properties: {e}") @@ -265,8 +264,8 @@ def create_communities(uri, username, password, database,model): write_communities_sucess = write_communities(gds, graph_project) if write_communities_sucess: logging.info("Applying community constraint to the graph.") - graph.query(CREATE_COMMUNITY_CONSTRAINT) - create_community_properties(graph,model) + gds.run_cypher(CREATE_COMMUNITY_CONSTRAINT) + create_community_properties(gds,model) logging.info("Communities creation process completed successfully.") else: logging.warning("Failed to write communities. Constraint was not applied.") From 266c8121883b29445ba0020f72038d1a99d0d30c Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Wed, 28 Aug 2024 18:02:49 +0000 Subject: [PATCH 030/292] removed graph object in the function --- backend/score.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/score.py b/backend/score.py index 717221520..7b06def44 100644 --- a/backend/score.py +++ b/backend/score.py @@ -244,7 +244,7 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database if "create_communities" in tasks: model = "openai-gpt-4o" - await asyncio.to_thread(create_communities, uri, userName, password, database,graph,model) + await asyncio.to_thread(create_communities, uri, userName, password, database,model) josn_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(josn_obj) logging.info(f'created communities') From cac1963d473ca9aa7c758336f1baefc465a4bd6e Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 29 Aug 2024 07:35:41 +0000 Subject: [PATCH 031/292] resetting the alert message on success scenario --- .../src/components/Popups/ConnectionModal/ConnectionModal.tsx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 63e2d7883..a7b3fcb1f 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -266,6 +266,9 @@ export default function ConnectionModal({ } } setTimeout(() => { + if(connectionMessage?.type!="danger"){ + setMessage({ type: 'unknown', content: '' }) + } setPassword(''); }, 3000); }; From 43ec5691ff1729fc8936e5345bcc78ab2fadb8e0 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 29 Aug 2024 13:03:06 +0000 Subject: [PATCH 032/292] Modified queries --- backend/src/communities.py | 39 ++++++++++++++++------------------ backend/src/post_processing.py | 4 ++-- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/backend/src/communities.py b/backend/src/communities.py index cfbb1cc21..dbcb73ebc 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -16,25 +16,20 @@ CREATE_COMMUNITY_GRAPH_PROJECTION = """ MATCH (source:{node_projection}) OPTIONAL MATCH (source)-[]->(target:{node_projection}) -WITH source, target, count(*) AS weight +WITH source, target, count(*) as weight WITH gds.graph.project( - {project_name}, - source, - target, - {{ - relationshipProperties: {{ weight: weight }} - }}, - {{ - undirectedRelationshipTypes: ['*'] - }} -) AS g + '{project_name}', + source, + target, + {{ + relationshipProperties: {{ weight: weight }} + }}, + {{undirectedRelationshipTypes: ['*']}} + ) AS g RETURN - g.graphName AS graph_name, - g.nodeCount AS nodes, - g.relationshipCount AS rels + g.graphName AS graph_name, g.nodeCount AS nodes, g.relationshipCount AS rels """ - CREATE_COMMUNITY_CONSTRAINT = "CREATE CONSTRAINT IF NOT EXISTS FOR (c:__Community__) REQUIRE c.id IS UNIQUE;" CREATE_COMMUNITY_LEVELS = """ MATCH (e:!Chunk&!Document&!__Community__) @@ -63,7 +58,7 @@ RETURN count(*) """ CREATE_COMMUNITY_RANKS = """ -MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(:!Chunk&!Document&!__Community__)<-[:MENTIONS]-(d:Document) +MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(:!Chunk&!Document&!__Community__)<-[HAS_ENTITY]-(:Chunk)<-[]-(d:Document) WITH c, count(distinct d) AS rank SET c.community_rank = rank; """ @@ -74,8 +69,8 @@ SET n.weight = chunkCount""" GET_COMMUNITY_INFO = """ -MATCH (c:`__Community__`)<-[:IN_COMMUNITY*]-(e:!Chunk&!Document&!__Community__) -WITH c, collect(e ) AS nodes +MATCH (c:`__Community__`)<-[:IN_COMMUNITY]-(e) +WITH c, collect(e) AS nodes WHERE size(nodes) > 1 CALL apoc.path.subgraphAll(nodes[0], { whitelistNodes:nodes @@ -83,7 +78,7 @@ YIELD relationships RETURN c.id AS communityId, [n in nodes | {id: n.id, description: n.description, type: [el in labels(n) WHERE el <> '__Entity__'][0]}] AS nodes, - [r in relationships | {start: startNode(r).id, type: type(r), end: endNode(r).id, description: r.description}] AS rels + [r in relationships | {start: startNode(r).id, type: type(r), end: endNode(r).id}] AS rels """ STORE_COMMUNITY_SUMMARIES = """ @@ -200,7 +195,6 @@ def prepare_string(community_data): relationship_type = rel['type'] relationship_description = f", description: {rel['description']}" if 'description' in rel and rel['description'] else "" relationships_description += f"({start_node})-[:{relationship_type}]->({end_node}){relationship_description}\n" - return nodes_description + "\n" + relationships_description except Exception as e: logging.error(f"Failed to prepare string from community data: {e}") @@ -221,8 +215,11 @@ def create_community_summaries(gds, model): community_chain = get_community_chain(model) summaries = [] + futures = [] with ThreadPoolExecutor() as executor: - futures = {executor.submit(process_community, community, community_chain): community for community in community_info_list} + for _,community in community_info_list.iterrows(): + future = executor.submit(process_community, community, community_chain) + futures.append(future) for future in as_completed(futures): try: diff --git a/backend/src/post_processing.py b/backend/src/post_processing.py index fa582e107..7d038c61b 100644 --- a/backend/src/post_processing.py +++ b/backend/src/post_processing.py @@ -8,7 +8,7 @@ DROP_INDEX_QUERY = "DROP INDEX entities IF EXISTS;" LABELS_QUERY = "CALL db.labels()" FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX entities FOR (n{labels_str}) ON EACH [n.id, n.description];" -FILTER_LABELS = ["Chunk","Document"] +FILTER_LABELS = ["Chunk","Document","__Community__"] HYBRID_SEARCH_INDEX_DROP_QUERY = "DROP INDEX keyword IF EXISTS;" @@ -80,7 +80,7 @@ def create_entity_embedding(graph:Neo4jGraph): def fetch_entities_for_embedding(graph): query = """ MATCH (e) - WHERE NOT (e:Chunk OR e:Document) AND e.embedding IS NULL AND e.id IS NOT NULL + WHERE NOT (e:Chunk OR e:Document OR e:`__Community__`) AND e.embedding IS NULL AND e.id IS NOT NULL RETURN elementId(e) AS elementId, e.id + " " + coalesce(e.description, "") AS text """ result = graph.query(query) From d010e419292bb94704bb5e2e6b464cc2d2cf40f1 Mon Sep 17 00:00:00 2001 From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Date: Fri, 30 Aug 2024 11:00:27 +0000 Subject: [PATCH 033/292] populate graph schema --- backend/test_integrationqa.py | 48 ++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 4d0a5e1ea..821cc6b5c 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -141,13 +141,12 @@ def test_chatbot_qna(model_name, mode='vector'): print("Failed ", e) return QA_n_RAG -#Get disconnected_nodes list +#Get Test disconnected_nodes list def disconected_nodes(): #graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes() print(nodes_list[0]["e"]["elementId"]) - status = "False" if total_nodes['total']>0: @@ -157,15 +156,19 @@ def disconected_nodes(): return nodes_list[0]["e"]["elementId"], status -#Delete disconnected_nodes list +#Test Delete delete_disconnected_nodes list def delete_disconected_nodes(lst_element_id): print(f'disconnect elementid list {lst_element_id}') #graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id)) print(f'delete disconnect api result {result}') + if not result: + return "True" + else: + return "False" -#Get Duplicate_nodes +#Test Get Duplicate_nodes def get_duplicate_nodes(): #graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) @@ -174,11 +177,12 @@ def get_duplicate_nodes(): return "True" else: return "False" - -#Merge Duplicate_nodes -def test_merge_duplicate_nodes(): - graphDb_data_Access = graphDBdataAccess(graph) - result = graphDb_data_Access.merge_duplicate_nodes(duplicate_nodes_list) + +#Test populate_graph_schema +def test_populate_graph_schema_from_text(model): + result_schema = populate_graph_schema_from_text('When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble.', model, True) + print(result_schema) + return result_schema # def compare_graph_results(results): # """ @@ -202,28 +206,32 @@ def run_tests(): for model_name in models: try: - final_list.append(test_graph_from_file_local(model_name)) - final_list.append(test_graph_from_wikipedia(model_name)) - final_list.append(test_graph_website(model_name)) - final_list.append(test_graph_from_youtube_video(model_name)) - final_list.append(test_chatbot_qna(model_name)) - final_list.append(test_chatbot_qna(model_name, mode='vector')) - final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext')) + final_list.append(test_graph_from_file_local(model_name)) + final_list.append(test_graph_from_wikipedia(model_name)) + final_list.append(test_populate_graph_schema_from_text(model_name)) + final_list.append(test_graph_website(model_name)) + final_list.append(test_graph_from_youtube_video(model_name)) + final_list.append(test_chatbot_qna(model_name)) + final_list.append(test_chatbot_qna(model_name, mode='vector')) + final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext')) except Exception as e: error_list.append((model_name, str(e))) - #Compare and log diffrences in graph results - # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results - + # #Compare and log diffrences in graph results + # # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results + # test_populate_graph_schema_from_text('openai-gpt-4o') dis_elementid, dis_status = disconected_nodes() lst_element_id = [dis_elementid] - delete_disconected_nodes(lst_element_id) + delt = delete_disconected_nodes(lst_element_id) dup = get_duplicate_nodes() + # schma = test_populate_graph_schema_from_text(model) # Save final results to CSV df = pd.DataFrame(final_list) print(df) df['execution_date'] = dt.today().strftime('%Y-%m-%d') df['disconnected_nodes']=dis_status df['get_duplicate_nodes']=dup + df['delete_disconected_nodes']=delt + # df['test_populate_graph_schema_from_text'] = schma df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) # Save error details to CSV From b3a00aca7e9e3aa21edee9c78fec455edf012ef7 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:20:01 +0000 Subject: [PATCH 034/292] not clearing the password when there is error scenario --- .../ConnectionModal/ConnectionModal.tsx | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index a7b3fcb1f..e11509adf 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -197,6 +197,16 @@ export default function ConnectionModal({ if (response?.data?.status !== 'Success') { throw new Error(response.data.error); } else { + localStorage.setItem( + 'neo4j.connection', + JSON.stringify({ + uri: connectionURI, + user: username, + password: password, + database: database, + userDbVectorIndex, + }) + ); setUserDbVectorIndex(response.data.data.db_vector_dimension); if ( (response.data.data.application_dimension === response.data.data.db_vector_dimension || @@ -228,6 +238,7 @@ export default function ConnectionModal({ /> ), }); + return; } else { setMessage({ type: 'danger', @@ -244,17 +255,8 @@ export default function ConnectionModal({ /> ), }); + return; } - localStorage.setItem( - 'neo4j.connection', - JSON.stringify({ - uri: connectionURI, - user: username, - password: password, - database: database, - userDbVectorIndex, - }) - ); } } catch (error) { setIsLoading(false); From 77b06db1458345ff57a7e2aba2fc2ad04b805420 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:49:00 +0000 Subject: [PATCH 035/292] fixed the vector index loading issue --- frontend/src/components/Content.tsx | 2 +- .../ConnectionModal/ConnectionModal.tsx | 20 +++++-------------- .../VectorIndexMisMatchAlert.tsx | 11 +++++++--- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 039c04106..2dd6e1154 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -859,4 +859,4 @@ const Content: React.FC = ({ ); }; -export default Content; \ No newline at end of file +export default Content; diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index e11509adf..23d9a1481 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -104,7 +104,6 @@ export default function ConnectionModal({ type: 'danger', content: ( recreateVectorIndex(chunksExistsWithDifferentEmbedding)} isVectorIndexAlreadyExists={chunksExistsWithDifferentEmbedding || isVectorIndexMatch} userVectorIndexDimension={JSON.parse(localStorage.getItem('neo4j.connection') ?? 'null').userDbVectorIndex} @@ -113,7 +112,7 @@ export default function ConnectionModal({ ), }); } - }, [isVectorIndexMatch, vectorIndexLoading, chunksExistsWithDifferentEmbedding, chunksExistsWithoutEmbedding]); + }, [isVectorIndexMatch, chunksExistsWithDifferentEmbedding, chunksExistsWithoutEmbedding]); const parseAndSetURI = (uri: string, urlparams = false) => { const uriParts: string[] = uri.split('://'); @@ -224,15 +223,7 @@ export default function ConnectionModal({ type: 'danger', content: ( - recreateVectorIndex( - !( - response.data.data.db_vector_dimension > 0 && - response.data.data.db_vector_dimension != response.data.data.application_dimension - ) - ) - } + recreateVectorIndex={() => recreateVectorIndex(false)} isVectorIndexAlreadyExists={response.data.data.db_vector_dimension != 0} chunksExists={true} /> @@ -244,7 +235,6 @@ export default function ConnectionModal({ type: 'danger', content: ( recreateVectorIndex(true)} isVectorIndexAlreadyExists={ response.data.data.db_vector_dimension != 0 && @@ -268,9 +258,9 @@ export default function ConnectionModal({ } } setTimeout(() => { - if(connectionMessage?.type!="danger"){ - setMessage({ type: 'unknown', content: '' }) - } + if (connectionMessage?.type != 'danger') { + setMessage({ type: 'unknown', content: '' }); + } setPassword(''); }, 3000); }; diff --git a/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx b/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx index f9e85b63e..3c2965f44 100644 --- a/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx +++ b/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx @@ -2,21 +2,22 @@ import { Box, Flex } from '@neo4j-ndl/react'; import Markdown from 'react-markdown'; import ButtonWithToolTip from '../../UI/ButtonWithToolTip'; import { useCredentials } from '../../../context/UserCredentials'; +import { useState } from 'react'; export default function VectorIndexMisMatchAlert({ - vectorIndexLoading, recreateVectorIndex, isVectorIndexAlreadyExists, userVectorIndexDimension, chunksExists, }: { - vectorIndexLoading: boolean; recreateVectorIndex: () => Promise; isVectorIndexAlreadyExists: boolean; userVectorIndexDimension?: number; chunksExists: boolean; }) { const { userCredentials } = useCredentials(); + const [vectorIndexLoading, setVectorIndexLoading] = useState(false); + return ( @@ -42,7 +43,11 @@ To proceed, please choose one of the following options: label='creates the supported vector index' placement='top' loading={vectorIndexLoading} - onClick={() => recreateVectorIndex()} + onClick={async () => { + setVectorIndexLoading(true); + await recreateVectorIndex(); + setVectorIndexLoading(false); + }} className='!w-full' color='danger' disabled={userCredentials === null} From d1662909c6dac6194420b84c440986488648632c Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 30 Aug 2024 14:07:51 +0000 Subject: [PATCH 036/292] fix: empty credentials payload for recreate vector index api --- .../ConnectionModal/ConnectionModal.tsx | 81 ++++++++++--------- 1 file changed, 43 insertions(+), 38 deletions(-) diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 23d9a1481..ab9fe2399 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -60,41 +60,43 @@ export default function ConnectionModal({ }, [open]); const recreateVectorIndex = useCallback( - async (isNewVectorIndex: boolean) => { - try { - setVectorIndexLoading(true); - const response = await createVectorIndex(userCredentials as UserCredentials, isNewVectorIndex); - setVectorIndexLoading(false); - if (response.data.status === 'Failed') { - throw new Error(response.data.error); - } else { - setMessage({ - type: 'success', - content: 'Successfully created the vector index', - }); - setConnectionStatus(true); - localStorage.setItem( - 'neo4j.connection', - JSON.stringify({ - uri: userCredentials?.uri, - user: userCredentials?.userName, - password: userCredentials?.password, - database: userCredentials?.database, - userDbVectorIndex: 384, - }) - ); - } - } catch (error) { - setVectorIndexLoading(false); - if (error instanceof Error) { - console.log('Error in recreating the vector index', error.message); - setMessage({ type: 'danger', content: error.message }); + async (isNewVectorIndex: boolean, usercredential: UserCredentials) => { + if (usercredential != null && Object.values(usercredential).length) { + try { + setVectorIndexLoading(true); + const response = await createVectorIndex(usercredential as UserCredentials, isNewVectorIndex); + setVectorIndexLoading(false); + if (response.data.status === 'Failed') { + throw new Error(response.data.error); + } else { + setMessage({ + type: 'success', + content: 'Successfully created the vector index', + }); + setConnectionStatus(true); + localStorage.setItem( + 'neo4j.connection', + JSON.stringify({ + uri: usercredential?.uri, + user: usercredential?.userName, + password: usercredential?.password, + database: usercredential?.database, + userDbVectorIndex: 384, + }) + ); + } + } catch (error) { + setVectorIndexLoading(false); + if (error instanceof Error) { + console.log('Error in recreating the vector index', error.message); + setMessage({ type: 'danger', content: error.message }); + } } + setTimeout(() => { + setMessage({ type: 'unknown', content: '' }); + setOpenConnection((prev) => ({ ...prev, openPopUp: false })); + }, 3000); } - setTimeout(() => { - setMessage({ type: 'unknown', content: '' }); - setOpenConnection((prev) => ({ ...prev, openPopUp: false })); - }, 3000); }, [userCredentials, userDbVectorIndex] ); @@ -104,7 +106,9 @@ export default function ConnectionModal({ type: 'danger', content: ( recreateVectorIndex(chunksExistsWithDifferentEmbedding)} + recreateVectorIndex={() => + recreateVectorIndex(chunksExistsWithDifferentEmbedding, userCredentials as UserCredentials) + } isVectorIndexAlreadyExists={chunksExistsWithDifferentEmbedding || isVectorIndexMatch} userVectorIndexDimension={JSON.parse(localStorage.getItem('neo4j.connection') ?? 'null').userDbVectorIndex} chunksExists={chunksExistsWithoutEmbedding} @@ -112,7 +116,7 @@ export default function ConnectionModal({ ), }); } - }, [isVectorIndexMatch, chunksExistsWithDifferentEmbedding, chunksExistsWithoutEmbedding]); + }, [isVectorIndexMatch, chunksExistsWithDifferentEmbedding, chunksExistsWithoutEmbedding, userCredentials]); const parseAndSetURI = (uri: string, urlparams = false) => { const uriParts: string[] = uri.split('://'); @@ -188,7 +192,8 @@ export default function ConnectionModal({ const submitConnection = async () => { const connectionURI = `${protocol}://${URI}${URI.split(':')[1] ? '' : `:${port}`}`; - setUserCredentials({ uri: connectionURI, userName: username, password: password, database: database, port: port }); + const credential = { uri: connectionURI, userName: username, password: password, database: database, port: port }; + setUserCredentials(credential); setIsLoading(true); try { const response = await connectAPI(connectionURI, username, password, database); @@ -223,7 +228,7 @@ export default function ConnectionModal({ type: 'danger', content: ( recreateVectorIndex(false)} + recreateVectorIndex={() => recreateVectorIndex(false, credential)} isVectorIndexAlreadyExists={response.data.data.db_vector_dimension != 0} chunksExists={true} /> @@ -235,7 +240,7 @@ export default function ConnectionModal({ type: 'danger', content: ( recreateVectorIndex(true)} + recreateVectorIndex={() => recreateVectorIndex(true, credential)} isVectorIndexAlreadyExists={ response.data.data.db_vector_dimension != 0 && response.data.data.db_vector_dimension != response.data.data.application_dimension From 088eda21f5f0ee553ddc955fad24f3c113adbfa9 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Mon, 2 Sep 2024 16:51:26 +0530 Subject: [PATCH 037/292] chatbot status (#676) * chatbot status * connection status check for ASK button * refresh disable check * review comment resolved * format fixes --- frontend/src/components/ChatBot/Chatbot.tsx | 15 +++++++++++---- frontend/src/components/Content.tsx | 3 +-- frontend/src/components/Graph/GraphViewModal.tsx | 4 ++++ frontend/src/components/Layout/DrawerChatbot.tsx | 3 ++- frontend/src/components/Layout/PageLayout.tsx | 9 +++++++-- frontend/src/components/Layout/SideNav.tsx | 3 +++ .../Deduplication/index.tsx | 4 ++-- frontend/src/context/UserCredentials.tsx | 7 ++++++- frontend/src/types.ts | 4 ++++ 9 files changed, 40 insertions(+), 12 deletions(-) diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index da5aaaf29..448af0e7d 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -22,7 +22,14 @@ import FallBackDialog from '../UI/FallBackDialog'; const InfoModal = lazy(() => import('./ChatInfoModal')); const Chatbot: FC = (props) => { - const { messages: listMessages, setMessages: setListMessages, isLoading, isFullScreen, clear } = props; + const { + messages: listMessages, + setMessages: setListMessages, + isLoading, + isFullScreen, + clear, + connectionStatus, + } = props; const [inputMessage, setInputMessage] = useState(''); const [loading, setLoading] = useState(isLoading); const { userCredentials } = useCredentials(); @@ -289,7 +296,7 @@ const Chatbot: FC = (props) => { shape='square' size='x-large' source={ChatBotAvatar} - status='online' + status={connectionStatus ? 'online' : 'offline'} type='image' /> ) : ( @@ -299,7 +306,7 @@ const Chatbot: FC = (props) => { name='KM' shape='square' size='x-large' - status='online' + status={connectionStatus ? 'online' : 'offline'} type='image' /> )} @@ -415,7 +422,7 @@ const Chatbot: FC = (props) => { placement='top' text={`Query Documents in ${chatMode} mode`} type='submit' - disabled={loading} + disabled={loading || !connectionStatus} size='medium' > {buttonCaptions.ask} {selectedRows != undefined && selectedRows.length > 0 && `(${selectedRows.length})`} diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 2dd6e1154..44e7fd325 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -57,8 +57,7 @@ const Content: React.FC = ({ }); const [openGraphView, setOpenGraphView] = useState(false); const [inspectedName, setInspectedName] = useState(''); - const [connectionStatus, setConnectionStatus] = useState(false); - const { setUserCredentials, userCredentials } = useCredentials(); + const { setUserCredentials, userCredentials, connectionStatus, setConnectionStatus } = useCredentials(); const [showConfirmationModal, setshowConfirmationModal] = useState(false); const [extractLoading, setextractLoading] = useState(false); diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 39438b788..4125c0d12 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -69,6 +69,7 @@ const GraphViewModal: React.FunctionComponent = ({ const [newScheme, setNewScheme] = useState({}); const [searchQuery, setSearchQuery] = useState(''); const debouncedQuery = useDebounce(searchQuery, 300); + const [disableRefresh, setDisableRefresh] = useState(false); // the checkbox selection const handleCheckboxChange = (graph: GraphType) => { @@ -165,6 +166,7 @@ const GraphViewModal: React.FunctionComponent = ({ setAllNodes(finalNodes); setAllRelationships(finalRels); setScheme(schemeVal); + setDisableRefresh(false); } else { setLoading(false); setStatus('danger'); @@ -292,6 +294,7 @@ const GraphViewModal: React.FunctionComponent = ({ // Refresh the graph with nodes and relations if file is processing const handleRefresh = () => { + setDisableRefresh(true); graphApi('refreshMode'); setGraphType(graphType); setNodes(nodes); @@ -455,6 +458,7 @@ const GraphViewModal: React.FunctionComponent = ({ text='Refresh graph' onClick={handleRefresh} placement='left' + disabled={disableRefresh} > diff --git a/frontend/src/components/Layout/DrawerChatbot.tsx b/frontend/src/components/Layout/DrawerChatbot.tsx index d08b2d8d1..3150e96a5 100644 --- a/frontend/src/components/Layout/DrawerChatbot.tsx +++ b/frontend/src/components/Layout/DrawerChatbot.tsx @@ -3,7 +3,7 @@ import Chatbot from '../ChatBot/Chatbot'; import { DrawerChatbotProps, Messages } from '../../types'; import { useMessageContext } from '../../context/UserMessages'; -const DrawerChatbot: React.FC = ({ isExpanded, clearHistoryData, messages }) => { +const DrawerChatbot: React.FC = ({ isExpanded, clearHistoryData, messages, connectionStatus }) => { const { setMessages } = useMessageContext(); const getIsLoading = (messages: Messages[]) => { @@ -19,6 +19,7 @@ const DrawerChatbot: React.FC = ({ isExpanded, clearHistoryD setMessages={setMessages} clear={clearHistoryData} isLoading={getIsLoading(messages)} + connectionStatus={connectionStatus} /> diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 8aca58109..8361ad3cd 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -33,7 +33,7 @@ export default function PageLayoutNew({ const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); const [showGenericModal, toggleGenericModal] = useReducer((s) => !s, false); - const { userCredentials } = useCredentials(); + const { userCredentials, connectionStatus } = useCredentials(); const toggleLeftDrawer = () => { if (largedesktops) { setIsLeftExpanded(!isLeftExpanded); @@ -156,7 +156,12 @@ export default function PageLayoutNew({ closeSettingModal={closeSettingModal} /> {showDrawerChatbot && ( - + )} = ({ position, @@ -43,6 +44,7 @@ const SideNav: React.FC = ({ const [chatModeAnchor, setchatModeAnchor] = useState(null); const [showChatMode, setshowChatMode] = useState(false); const largedesktops = useMediaQuery(`(min-width:1440px )`); + const { connectionStatus } = useCredentials(); const date = new Date(); useEffect(() => { @@ -249,6 +251,7 @@ const SideNav: React.FC = ({ messages={messages ?? []} setMessages={setMessages} isLoading={getIsLoading(messages ?? [])} + connectionStatus={connectionStatus} /> , diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index f5a021e30..36fcff3d1 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -80,12 +80,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - (d.e.elementId === nodeid + d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d) + : d ); }); }; diff --git a/frontend/src/context/UserCredentials.tsx b/frontend/src/context/UserCredentials.tsx index 20ed75ae5..af13ea371 100644 --- a/frontend/src/context/UserCredentials.tsx +++ b/frontend/src/context/UserCredentials.tsx @@ -1,4 +1,4 @@ -import { createContext, useState, useContext, FunctionComponent, ReactNode } from 'react'; +import { createContext, useState, useContext, FunctionComponent, ReactNode, useReducer } from 'react'; import { ContextProps, UserCredentials } from '../types'; type Props = { @@ -8,6 +8,8 @@ type Props = { export const UserConnection = createContext({ userCredentials: null, setUserCredentials: () => null, + connectionStatus: false, + setConnectionStatus: () => null, }); export const useCredentials = () => { const userCredentials = useContext(UserConnection); @@ -15,9 +17,12 @@ export const useCredentials = () => { }; const UserCredentialsWrapper: FunctionComponent = (props) => { const [userCredentials, setUserCredentials] = useState(null); + const [connectionStatus, setConnectionStatus] = useReducer((state) => !state, false); const value = { userCredentials, setUserCredentials, + connectionStatus, + setConnectionStatus, }; return {props.children}; }; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 5bf076d10..2802a2d6b 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -247,6 +247,7 @@ export type ChatbotProps = { isLoading: boolean; clear?: boolean; isFullScreen?: boolean; + connectionStatus: boolean; }; export interface WikipediaModalTypes { hideModal: () => void; @@ -638,11 +639,14 @@ export interface DrawerChatbotProps { isExpanded: boolean; clearHistoryData: boolean; messages: Messages[]; + connectionStatus: boolean; } export interface ContextProps { userCredentials: UserCredentials | null; setUserCredentials: (UserCredentials: UserCredentials) => void; + connectionStatus: boolean; + setConnectionStatus: Dispatch>; } export interface MessageContextType { messages: Messages[] | []; From dbfe2a73752f951f2f7a5a570035d9b3e071cf3c Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Mon, 2 Sep 2024 17:19:37 +0000 Subject: [PATCH 038/292] added properties and modified to entity labels --- backend/src/communities.py | 243 ++++++++++++++++++++++++-------- backend/src/shared/common_fn.py | 4 +- 2 files changed, 190 insertions(+), 57 deletions(-) diff --git a/backend/src/communities.py b/backend/src/communities.py index dbcb73ebc..1c670fff1 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -3,19 +3,19 @@ from src.llm import get_llm from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser -from tqdm import tqdm from concurrent.futures import ThreadPoolExecutor, as_completed -from tqdm import tqdm +import os +from src.shared.common_fn import load_embedding_model COMMUNITY_PROJECTION_NAME = "communities" NODE_PROJECTION = "!Chunk&!Document&!__Community__" +NODE_PROJECTION_ENTITY = "__Entity__" MAX_WORKERS = 10 CREATE_COMMUNITY_GRAPH_PROJECTION = """ -MATCH (source:{node_projection}) -OPTIONAL MATCH (source)-[]->(target:{node_projection}) +MATCH (source:{node_projection})-[]->(target:{node_projection}) WITH source, target, count(*) as weight WITH gds.graph.project( '{project_name}', @@ -32,7 +32,7 @@ CREATE_COMMUNITY_CONSTRAINT = "CREATE CONSTRAINT IF NOT EXISTS FOR (c:__Community__) REQUIRE c.id IS UNIQUE;" CREATE_COMMUNITY_LEVELS = """ -MATCH (e:!Chunk&!Document&!__Community__) +MATCH (e:`__Entity__`) WHERE e.communities is NOT NULL UNWIND range(0, size(e.communities) - 1 , 1) AS index CALL { @@ -52,7 +52,7 @@ ON CREATE SET current.level = index MERGE (previous:`__Community__` {id: toString(index - 1) + '-' + toString(e.communities[index - 1])}) ON CREATE SET previous.level = index - 1 - MERGE (previous)-[:IN_COMMUNITY]->(current) + MERGE (previous)-[:PARENT_COMMUNITY]->(current) RETURN count(*) AS count_1 } RETURN count(*) @@ -63,13 +63,26 @@ SET c.community_rank = rank; """ +CREATE_PARENT_COMMUNITY_RANKS = """ +MATCH (c:__Community__)<-[:PARENT_COMMUNITY*]-(:__Community__)<-[:IN_COMMUNITY*]-(:!Chunk&!Document&!__Community__)<-[HAS_ENTITY]-(:Chunk)<-[]-(d:Document) +WITH c, count(distinct d) AS rank +SET c.community_rank = rank; +""" + CREATE_COMMUNITY_WEIGHTS = """ MATCH (n:`__Community__`)<-[:IN_COMMUNITY]-()<-[:HAS_ENTITY]-(c) WITH n, count(distinct c) AS chunkCount -SET n.weight = chunkCount""" +SET n.weight = chunkCount +""" +CREATE_PARENT_COMMUNITY_WEIGHTS = """ +MATCH (n:`__Community__`)<-[:PARENT_COMMUNITY*]-(:`__Community__`)<-[:IN_COMMUNITY]-()<-[:HAS_ENTITY]-(c) +WITH n, count(distinct c) AS chunkCount +SET n.weight = chunkCount +""" GET_COMMUNITY_INFO = """ MATCH (c:`__Community__`)<-[:IN_COMMUNITY]-(e) +WHERE c.level = 0 WITH c, collect(e) AS nodes WHERE size(nodes) > 1 CALL apoc.path.subgraphAll(nodes[0], { @@ -81,17 +94,64 @@ [r in relationships | {start: startNode(r).id, type: type(r), end: endNode(r).id}] AS rels """ +GET_PARENT_COMMUNITY_INFO = """ +MATCH (p:`__Community__`)<-[:PARENT_COMMUNITY*]-(c:`__Community__`) +WHERE p.summary is null and c.summary is not null +RETURN p.id as communityId, collect(c.summary) as texts +""" + + STORE_COMMUNITY_SUMMARIES = """ UNWIND $data AS row MERGE (c:__Community__ {id:row.community}) SET c.summary = row.summary """ +COMMUNITY_SYSTEM_TEMPLATE = "Given input triples, generate the information summary. No pre-amble." + COMMUNITY_TEMPLATE = """Based on the provided nodes and relationships that belong to the same graph community, generate a natural language summary of the provided information: {community_info} -Summary:""" +Summary:""" + +PARENT_COMMUNITY_SYSTEM_TEMPLATE = "Given an input list of community summaries, generate a summary of the information" + +PARENT_COMMUNITY_TEMPLATE = """Based on the provided list of community summaries that belong to the same graph community, +generate a natural language summary of the information.Include all the necessary information as possible +{community_info} + +Summary:""" + + +GET_COMMUNITY_DETAILS = """ +MATCH (c:`__Community__`) +WHERE c.embedding IS NULL AND c.summary IS NOT NULL +RETURN c.id as communityId, c.summary as text +""" + +WRITE_COMMUNITY_EMBEDDINGS = """ +UNWIND $rows AS row +MATCH (c) WHERE c.id = row.communityId +CALL db.create.setNodeVectorProperty(c, "embedding", row.embedding) +""" + +DROP_COMMUNITIES = "MATCH (c:`__Community__`) DETACH DELETE c" +DROP_COMMUNITY_PROPERTY = "MATCH (e:`__Entity__`) REMOVE e.communities" + + +ENTITY_VECTOR_INDEX_NAME = "entity_vector" +ENTITY_VECTOR_EMBEDDING_DIMENSION = 384 + +CREATE_ENTITY_VECTOR_INDEX = """ +CREATE VECTOR INDEX {index_name} IF NOT EXISTS FOR (e:__Entity__) ON e.embedding +OPTIONS {{ + indexConfig: {{ + `vector.dimensions`: {embedding_dimension}, + `vector.similarity_function`: 'cosine' + }} +}} +""" def get_gds_driver(uri, username, password, database): @@ -117,22 +177,6 @@ def create_community_graph_projection(gds, project_name=COMMUNITY_PROJECTION_NAM gds.graph.drop(project_name) logging.info(f"Creating new graph project '{project_name}'.") - # graph_project, result = gds.graph.project( - # project_name, - # node_projection, - # { - # "_ALL_": { - # "type": "*", - # "orientation": "UNDIRECTED", - # "properties": { - # "weight": { - # "property": "*", - # "aggregation": "COUNT" - # } - # } - # } - # } - # ) projection_query = CREATE_COMMUNITY_GRAPH_PROJECTION.format(node_projection=node_projection,project_name=project_name) graph_projection_result = gds.run_cypher(projection_query) projection_result = graph_projection_result.to_dict(orient="records")[0] @@ -159,14 +203,17 @@ def write_communities(gds, graph_project, project_name=COMMUNITY_PROJECTION_NAME return False -def get_community_chain(model, community_template=COMMUNITY_TEMPLATE): +def get_community_chain(model, is_parent=False,community_template=COMMUNITY_TEMPLATE,system_template=COMMUNITY_SYSTEM_TEMPLATE): try: + if is_parent: + community_template=PARENT_COMMUNITY_TEMPLATE + system_template= PARENT_COMMUNITY_SYSTEM_TEMPLATE llm, model_name = get_llm(model) community_prompt = ChatPromptTemplate.from_messages( [ ( "system", - "Given input triples, generate the information summary. No pre-amble.", + system_template, ), ("human", community_template), ] @@ -200,68 +247,154 @@ def prepare_string(community_data): logging.error(f"Failed to prepare string from community data: {e}") raise -def process_community(community, community_chain): +def process_community_info(community, chain, is_parent=False): try: - formatted_community_info = prepare_string(community) - summary = community_chain.invoke({'community_info': formatted_community_info}) + if is_parent: + combined_text = " ".join(f"Summary {i+1}: {summary}" for i, summary in enumerate(community.get("texts", []))) + else: + combined_text = prepare_string(community) + summary = chain.invoke({'community_info': combined_text}) return {"community": community['communityId'], "summary": summary} except Exception as e: logging.error(f"Failed to process community {community.get('communityId', 'unknown')}: {e}") - raise + return None def create_community_summaries(gds, model): try: community_info_list = gds.run_cypher(GET_COMMUNITY_INFO) community_chain = get_community_chain(model) - + summaries = [] - futures = [] with ThreadPoolExecutor() as executor: - for _,community in community_info_list.iterrows(): - future = executor.submit(process_community, community, community_chain) - futures.append(future) - + futures = [executor.submit(process_community_info, community, community_chain) for _, community in community_info_list.items()] + for future in as_completed(futures): - try: - summaries.append(future.result()) - except Exception as e: - logging.error(f"Failed to retrieve result for a community: {e}") + result = future.result() + if result: + summaries.append(result) + else: + logging.error("community summaries could not be processed.") gds.run_cypher(STORE_COMMUNITY_SUMMARIES, params={"data": summaries}) + + parent_community_info = gds.run_cypher(GET_PARENT_COMMUNITY_INFO) + parent_community_chain = get_community_chain(model, is_parent=True) + + parent_summaries = [] + with ThreadPoolExecutor() as executor: + futures = [executor.submit(process_community_info, community, parent_community_chain, is_parent=True) for _, community in parent_community_info.items()] + + for future in as_completed(futures): + result = future.result() + if result: + parent_summaries.append(result) + else: + logging.error("parent community summaries could not be processed.") + + gds.run_cypher(STORE_COMMUNITY_SUMMARIES, params={"data": parent_summaries}) + except Exception as e: logging.error(f"Failed to create community summaries: {e}") raise +def create_community_embeddings(gds): + try: + embedding_model = os.getenv('EMBEDDING_MODEL') + embeddings, dimension = load_embedding_model(embedding_model) + logging.info(f"Embedding model '{embedding_model}' loaded successfully.") + + logging.info("Fetching community details.") + rows = gds.run_cypher(GET_COMMUNITY_DETAILS) + rows = rows[['communityId', 'text']].to_dict(orient='records') + logging.info(f"Fetched {len(rows)} communities.") + + batch_size = 100 + for i in range(0, len(rows), batch_size): + batch_rows = rows[i:i+batch_size] + for row in batch_rows: + try: + row['embedding'] = embeddings.embed_query(row['text']) + except Exception as e: + logging.error(f"Failed to embed text for community ID {row['communityId']}: {e}") + row['embedding'] = None + + try: + logging.info("Writing embeddings to the database.") + gds.run_cypher(WRITE_COMMUNITY_EMBEDDINGS, params={'rows': batch_rows}) + logging.info("Embeddings written successfully.") + except Exception as e: + logging.error(f"Failed to write embeddings to the database: {e}") + continue + return dimension + except Exception as e: + logging.error(f"An error occurred during the community embedding process: {e}") +def create_entity_vector_index(gds, embedding_dimension=ENTITY_VECTOR_EMBEDDING_DIMENSION): + query = CREATE_ENTITY_VECTOR_INDEX.format( + index_name=ENTITY_VECTOR_INDEX_NAME, + embedding_dimension=embedding_dimension + ) + try: + logging.info(f"Running Cypher query to create entity vector index: {query}") + gds.run_cypher(query) + logging.info("Entity vector index created successfully.") + except Exception as e: + logging.error(f"Error occurred while creating entity vector index: {e}", exc_info=True) + def create_community_properties(gds, model): + commands = [ + (CREATE_COMMUNITY_CONSTRAINT, "created community constraint to the graph."), + (CREATE_COMMUNITY_LEVELS, "Successfully created community levels."), + (CREATE_COMMUNITY_RANKS, "Successfully created community ranks."), + (CREATE_PARENT_COMMUNITY_RANKS, "Successfully created parent community ranks."), + (CREATE_COMMUNITY_WEIGHTS, "Successfully created community weights."), + (CREATE_PARENT_COMMUNITY_WEIGHTS, "Successfully created parent community weights."), + ] try: - # Create community levels - gds.run_cypher(CREATE_COMMUNITY_LEVELS) - logging.info("Successfully created community levels.") + for command, message in commands: + gds.run_cypher(command) + logging.info(message) - # Create community ranks - gds.run_cypher(CREATE_COMMUNITY_RANKS) - logging.info("Successfully created community ranks.") - - # Create community weights - gds.run_cypher(CREATE_COMMUNITY_WEIGHTS) - logging.info("Successfully created community weights.") - - # Create community summaries create_community_summaries(gds, model) logging.info("Successfully created community summaries.") + + embedding_dimension = create_community_embeddings(gds) + logging.info("Successfully created community embeddings.") + + create_entity_vector_index(gds,embedding_dimension=embedding_dimension) + logging.info("Successfully created Entity Vector Index.") + except Exception as e: - logging.error(f"Failed to create community properties: {e}") + logging.error(f"Error during community properties creation: {e}") raise + +def clear_communities(gds): + try: + logging.info("Starting to clear communities.") + + logging.info("Dropping communities...") + gds.run_cypher(DROP_COMMUNITIES) + logging.info(f"Communities dropped successfully") + + logging.info("Dropping community property from entities...") + gds.run_cypher(DROP_COMMUNITY_PROPERTY) + logging.info(f"Community property dropped successfully") + + except Exception as e: + logging.error(f"An error occurred while clearing communities: {e}") + raise + + def create_communities(uri, username, password, database,model): try: gds = get_gds_driver(uri, username, password, database) + clear_communities(gds) + graph_project = create_community_graph_projection(gds) write_communities_sucess = write_communities(gds, graph_project) if write_communities_sucess: - logging.info("Applying community constraint to the graph.") - gds.run_cypher(CREATE_COMMUNITY_CONSTRAINT) + logging.info("Starting Community properties creation process.") create_community_properties(gds,model) logging.info("Communities creation process completed successfully.") else: diff --git a/backend/src/shared/common_fn.py b/backend/src/shared/common_fn.py index 2037682e7..6d24912c7 100644 --- a/backend/src/shared/common_fn.py +++ b/backend/src/shared/common_fn.py @@ -94,8 +94,8 @@ def load_embedding_model(embedding_model_name: str): return embeddings, dimension def save_graphDocuments_in_neo4j(graph:Neo4jGraph, graph_document_list:List[GraphDocument]): - # graph.add_graph_documents(graph_document_list, baseEntityLabel=True,include_source=True) - graph.add_graph_documents(graph_document_list) + graph.add_graph_documents(graph_document_list, baseEntityLabel=True,include_source=True) + # graph.add_graph_documents(graph_document_list) def handle_backticks_nodes_relationship_id_type(graph_document_list:List[GraphDocument]): for graph_document in graph_document_list: From 3d25d78cf68a35bb142074585ff7c36587fb35a9 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 3 Sep 2024 17:51:29 +0530 Subject: [PATCH 039/292] Post processing call after all files completion (#716) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Dev To STAGING (#532) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#535) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * format fixes * Close connect when graph object is not none * Call garbage collector to release the menory * Change error message * Added driver config as user_agent * Updated doc for the LLM_MODELS and GCS_FILE_CACHE (#473) * Web URLs are user input (#475) * web url support backend * added the tabs for input source * user agent added for Neo4jGraph connection * Tab view for sources * extract handling for web ur's * initial input handling * chunk creation before processing * code structure * format fixes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * changed the regex for web and cancel button naming * changed the schema dropdown type * readme updates * PROD version fix * changed the alert message for gcs * Delete unconnected entities from DB (#482) * 457 add schema before generate graph (#478) * schema setting from generate graph * changes * changes * badge changes * bug fix * Fulltext index and Update similarity graph (#479) * added full_text index * added one common function for post_processing * post processing api * added tasks param * modifed logging * post processing changes --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Graph and vector search (#485) * Modified the retrival query * added the chatmode toggle component * Modified to vector search * Moved the templates to constants * added the icons * added chat modes * code structure changes * Intergrated the API changges * Modified retrieval queries,refactored code * API integration changes * added the score * order change * wording change * modified constants * added graph+vector * added the tooltips * Modified query * removed the graph mode * tooltip camel Case * added the icon and extern link for web source in the info modal * added the youtube link in the source used tab * format fixes * added the hoverable link --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Update InfoModal.tsx * removed hover from chunks * remove… * lint fixes * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * format fixes * Close connect when graph object is not none * Call garbage collector to release the menory * Change error message * Added driver config as user_agent * Updated doc for the LLM_MODELS and GCS_FILE_CACHE (#473) * Web URLs are user input (#475) * web url support backend * added the tabs for input source * user agent added for Neo4jGraph connection * Tab view for sources * extract handling for web ur's * initial input handling * chunk creation before processing * code structure * format fixes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * changed the regex for web and cancel button naming * changed the schema dropdown type * readme updates * PROD version fix * changed the alert message for gcs * Delete unconnected entities from DB (#482) * 457 add schema before generate graph (#478) * schema setting from generate graph * changes * changes * badge changes * bug fix * Fulltext index and Update similarity graph (#479) * added full_text index * added one common function for post_processing * post processing api * added tasks param * modifed logging * post processing changes --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Graph and vector search (#485) * Modified the retrival query * added the chatmode toggle component * Modified to vector search * Moved the templates to constants * added the icons * added chat modes * code structure changes * Intergrated the API changges * Modified retrieval queries,refactored code * API integration changes * added the score * order change * wording change * modified constants * added graph+vector * added the tooltips * Modified query * removed the graph mode * tooltip camel Case * added the icon and extern link for web source in the info modal * added the youtube link in the source used tab * format fixes * added the hoverable link --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Update InfoModal.tsx * removed hover from chunks * remove… * lint fixes * connection _check * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * format fixes * Close connect when graph object is not none * Call garbage collector to release the menory * Change error message * Added driver config as user_agent * Updated doc for the LLM_MODELS and GCS_FILE_CACHE (#473) * Web URLs are user input (#475) * web url support backend * added the tabs for input source * user agent added for Neo4jGraph connection * Tab view for sources * extract handling for web ur's * initial input handling * chunk creation before processing * code structure * format fixes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * changed the regex for web and cancel button naming * changed the schema dropdown type * readme updates * PROD version fix * changed the alert message for gcs * Delete unconnected entities from DB (#482) * 457 add schema before generate graph (#478) * schema setting from generate graph * changes * changes * badge changes * bug fix * Fulltext index and Update similarity graph (#479) * added full_text index * added one common function for post_processing * post processing api * added tasks param * modifed logging * post processing changes --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Graph and vector search (#485) * Modified the retrival query * added the chatmode toggle component * Modified to vector search * Moved the templates to constants * added the icons * added chat modes * code structure changes * Intergrated the API changges * Modified retrieval queries,refactored code * API integration changes * added the score * order change * wording change * modified constants * added graph+vector * added the tooltips * Modified query * removed the graph mode * tooltip camel Case * added the icon and extern link for web source in the info modal * added the youtube link in the source used tab * format fixes * added the hoverable link --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Update InfoModal.tsx * removed hover from chunks * remove… * lint fixes * connection _check * Dev (#701) * lint fixes * connection _check --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * Chatbot changes (#700) * added Hybrid search from graph * modified mode params * fixed issue delete entities return count * removed specified version due to dependency clashes between versions * updated script"integration test cases" * decreased the delay for pollintg API * Graph enhancements (#696) * relationship Changes * addition of relationship labels * onclick to nodes * node-highlight * Build fixex * slash docker change * deactivating previous node/relationshsips * lint fixes * class issue * search * search on basis of id / captions * debounce changes * class changes (#693) * legends highlight * search query reset * node size * changed chat mode names (#702) * DEV to STAGING (#703) * Chatbot changes (#700) * added Hybrid search from graph * modified mode params * fixed issue delete entities return count * removed specified version due to dependency clashes between versions * updated script"integration test cases" * decreased the delay for pollintg API * Graph enhancements (#696) * relationship Changes * addition of relationship labels * onclick to nodes * node-highlight * Build fixex * slash docker change * deactivating previous node/relationshsips * lint fixes * class issue * search * search on basis of id / captions * debounce changes * class changes (#693) * legends highlight * search query reset * node size * changed chat mode names (#702) --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: Pravesh1988 Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * env changes * used axios instance for network calls * disabled the toolip when dropdown is open state * format fixes + chat mode naming changes * mode added to info model for entities * Dev (#705) * connection _check * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * format fixes * Close connect when graph object is not none * Call garbage collector to release the menory * Change error message * Added driver config as user_agent * Updated doc for the LLM_MODELS and GCS_FILE_CACHE (#473) * Web URLs are user input (#475) * web url support backend * added the tabs for input source * user agent added for Neo4jGraph connection * Tab view for sources * extract handling for web ur's * initial input handling * chunk creation before processing * code structure * format fixes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * changed the regex for web and cancel button naming * changed the schema dropdown type * readme updates * PROD version fix * changed the alert message for gcs * Delete unconnected entities from DB (#482) * 457 add schema before generate graph (#478) * schema setting from generate graph * changes * changes * badge changes * bug fix * Fulltext index and Update similarity graph (#479) * added full_text index * added one common function for post_processing * post processing api * added tasks param * modifed logging * post processing changes --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Graph and vector search (#485) * Modified the retrival query * added the chatmode toggle component * Modified to vector search * Moved the templates to constants * added the icons * added chat modes * code structure changes * Intergrated the API changges * Modified retrieval queries,refactored code * API integration changes * added the score * order change * wording change * modified constants * added graph+vector * added the tooltips * Modified query * removed the graph mode * tooltip camel Case * added the icon and extern link for web source in the info modal * added the youtube link in the source used tab * format fixes * added the hoverable link --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Update InfoModal.tsx … * Issue fixed, List out of index while getting status of dicuement node * default modes in staging * processing count updated on cancel * processing count update fix on cancel * format fixes * remove whitespace for enviroment variable which due to an error "xxx may not contain whitespace" (#707) * updated disconnected nodes * updated disconnected nodes * fix: Processed count update on failed condition * added disconnected and up nodes * resetting the alert message on success scenario * populate graph schema * not clearing the password when there is error scenario * fixed the vector index loading issue * fix: empty credentials payload for recreate vector index api * invoking the post processing after all processing completion --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: destiny966113 <90891243+destiny966113@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> Co-authored-by: Ikko Eltociear Ashimine Co-authored-by: Pravesh1988 Co-authored-by: edenbuaa --- README.md | 2 +- backend/Performance_test.py | 1 + backend/requirements.txt | 24 +- backend/score.py | 51 +- backend/src/QA_integration_new.py | 28 +- backend/src/main.py | 161 ++++--- backend/src/shared/constants.py | 2 +- backend/test_integrationqa.py | 439 ++++++++---------- example.env | 37 +- frontend/Dockerfile | 3 +- frontend/example.env | 1 - frontend/src/API/Index.ts | 7 + frontend/src/App.css | 11 +- .../src/components/ChatBot/ChatInfoModal.tsx | 19 +- .../src/components/ChatBot/ChatModeToggle.tsx | 7 +- .../src/components/ChatBot/Info/InfoModal.tsx | 13 +- frontend/src/components/Content.tsx | 74 +-- frontend/src/components/Dropdown.tsx | 11 +- frontend/src/components/FileTable.tsx | 7 + .../src/components/Graph/GraphViewModal.tsx | 290 ++++++++++-- frontend/src/components/Graph/LegendsChip.tsx | 10 +- .../ConnectionModal/ConnectionModal.tsx | 116 ++--- .../VectorIndexMisMatchAlert.tsx | 11 +- .../Deduplication/index.tsx | 2 +- .../DeleteTabForOrphanNodes/index.tsx | 2 +- frontend/src/components/QuickStarter.tsx | 2 +- frontend/src/components/UI/Legend.tsx | 14 +- frontend/src/components/UI/ShowAll.tsx | 38 ++ frontend/src/context/UsersFiles.tsx | 2 +- frontend/src/hooks/useSse.tsx | 5 +- frontend/src/services/CancelAPI.ts | 5 +- frontend/src/services/ChunkEntitiesInfo.ts | 5 +- frontend/src/services/CommonAPI.ts | 5 +- frontend/src/services/ConnectAPI.ts | 5 +- frontend/src/services/DeleteFiles.ts | 5 +- frontend/src/services/DeleteOrphanNodes.ts | 5 +- frontend/src/services/GetDuplicateNodes.ts | 5 +- frontend/src/services/GetFiles.ts | 9 +- .../src/services/GetNodeLabelsRelTypes.ts | 5 +- frontend/src/services/GetOrphanNodes.ts | 5 +- frontend/src/services/GraphQuery.ts | 5 +- frontend/src/services/HealthStatus.ts | 7 +- .../src/services/MergeDuplicateEntities.ts | 5 +- frontend/src/services/PollingAPI.ts | 9 +- frontend/src/services/PostProcessing.ts | 5 +- frontend/src/services/QnaAPI.ts | 7 +- frontend/src/services/SchemaFromTextAPI.ts | 5 +- frontend/src/services/URLScan.ts | 5 +- frontend/src/services/vectorIndexCreation.ts | 5 +- frontend/src/types.ts | 39 +- frontend/src/utils/Constants.ts | 10 +- frontend/src/utils/Utils.ts | 10 +- frontend/yarn.lock | 2 +- 53 files changed, 920 insertions(+), 638 deletions(-) create mode 100644 frontend/src/API/Index.ts create mode 100644 frontend/src/components/UI/ShowAll.tsx diff --git a/README.md b/README.md index 800721ec3..e0b59346d 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ Allow unauthenticated request : Yes | VITE_LLM_MODELS | Mandatory | diffbot,openai-gpt-3.5,openai-gpt-4o | Models available for selection on the frontend, used for entities extraction and Q&A | VITE_CHAT_MODES | Mandatory | vector,graph+vector,graph,hybrid | Chat modes available for Q&A | VITE_ENV | Mandatory | DEV or PROD | Environment variable for the app | -| VITE_TIME_PER_CHUNK | Optional | 4 | Time per chunk for processing | +| VITE_TIME_PER_PAGE | Optional | 50 | Time per page for processing | | VITE_CHUNK_SIZE | Optional | 5242880 | Size of each chunk of file for upload | | VITE_GOOGLE_CLIENT_ID | Optional | | Client ID for Google authentication | | GCS_FILE_CACHE | Optional | False | If set to True, will save the files to process into GCS. If set to False, will save the files locally | diff --git a/backend/Performance_test.py b/backend/Performance_test.py index fc0aee66f..712d3daf1 100644 --- a/backend/Performance_test.py +++ b/backend/Performance_test.py @@ -94,6 +94,7 @@ def performance_main(): for _ in range(CONCURRENT_REQUESTS): futures.append(executor.submit(post_request_chunk)) + # Chatbot request futures # Chatbot request futures # for message in CHATBOT_MESSAGES: # futures.append(executor.submit(chatbot_request, message)) diff --git a/backend/requirements.txt b/backend/requirements.txt index ab42a749d..46c57aea5 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -69,18 +69,18 @@ jsonpath-python==1.0.6 jsonpointer==2.4 json-repair==0.25.2 kiwisolver==1.4.5 -langchain==0.2.8 -langchain-aws==0.1.9 -langchain-anthropic==0.1.19 -langchain-fireworks==0.1.4 -langchain-google-genai==1.0.7 -langchain-community==0.2.7 -langchain-core==0.2.19 -langchain-experimental==0.0.62 -langchain-google-vertexai==1.0.6 -langchain-groq==0.1.6 -langchain-openai==0.1.14 -langchain-text-splitters==0.2.2 +langchain +langchain-aws +langchain-anthropic +langchain-fireworks +langchain-google-genai +langchain-community +langchain-core +langchain-experimental +langchain-google-vertexai +langchain-groq +langchain-openai +langchain-text-splitters langdetect==1.0.9 langsmith==0.1.83 layoutparser==0.3.4 diff --git a/backend/score.py b/backend/score.py index 7b06def44..19f7ddb1a 100644 --- a/backend/score.py +++ b/backend/score.py @@ -106,8 +106,8 @@ async def create_source_knowledge_graph_url( return create_api_response('Failed',message='source_type is other than accepted source') message = f"Source Node created successfully for source type: {source_type} and source: {source}" - josn_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response("Success",message=message,success_count=success_count,failed_count=failed_count,file_name=lst_file_name) except Exception as e: error_message = str(e) @@ -209,9 +209,9 @@ async def extract_knowledge_graph_from_file( else: logging.info(f'Deleted File Path: {merged_file_path} and Deleted File Name : {file_name}') delete_uploaded_local_file(merged_file_path,file_name) - josn_obj = {'message':message,'error_message':error_message, 'file_name': file_name,'status':'Failed','db_url':uri,'failed_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) - logging.exception(f'File Failed in extraction: {josn_obj}') + json_obj = {'message':message,'error_message':error_message, 'file_name': file_name,'status':'Failed','db_url':uri,'failed_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) + logging.exception(f'File Failed in extraction: {json_obj}') return create_api_response('Failed', message=message + error_message[:100], error=error_message, file_name = file_name) finally: gc.collect() @@ -226,8 +226,8 @@ async def get_source_list(uri:str, userName:str, password:str, database:str=None if " " in uri: uri = uri.replace(" ","+") result = await asyncio.to_thread(get_source_list_from_graph,uri,userName,decoded_password,database) - josn_obj = {'api_name':'sources_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'sources_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response("Success",data=result) except Exception as e: job_status = "Failed" @@ -251,19 +251,20 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database if "materialize_text_chunk_similarities" in tasks: await asyncio.to_thread(update_graph, graph) - josn_obj = {'api_name': 'post_processing/materialize_text_chunk_similarities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name': 'post_processing/update_similarity_graph', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) logging.info(f'Updated KNN Graph') + if "enable_hybrid_search_and_fulltext_search_in_bloom" in tasks: await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="entities") - await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="keyword") + # await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="keyword") josn_obj = {'api_name': 'post_processing/enable_hybrid_search_and_fulltext_search_in_bloom', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(josn_obj) logging.info(f'Full Text index created') if os.environ.get('ENTITY_EMBEDDING','False').upper()=="TRUE" and "materialize_entity_similarities" in tasks: await asyncio.to_thread(create_entity_embedding, graph) - josn_obj = {'api_name': 'post_processing/materialize_entity_similarities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) logging.info(f'Entity Embeddings created') return create_api_response('Success', message='All tasks completed successfully') @@ -292,8 +293,8 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), logging.info(f"Total Response time is {total_call_time:.2f} seconds") result["info"]["response_time"] = round(total_call_time, 2) - josn_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -309,8 +310,8 @@ async def chunk_entities(uri=Form(),userName=Form(), password=Form(), chunk_ids= try: logging.info(f"URI: {uri}, Username: {userName}, chunk_ids: {chunk_ids}") result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, chunk_ids=chunk_ids) - josn_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -337,8 +338,8 @@ async def graph_query( password=password, document_names=document_names ) - josn_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response('Success', data=result) except Exception as e: job_status = "Failed" @@ -387,8 +388,8 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber try: graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(upload_file, graph, model, file, chunkNumber, totalChunks, originalname, uri, CHUNK_DIR, MERGED_DIR) - josn_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) if int(chunkNumber) == int(totalChunks): return create_api_response('Success',data=result, message='Source Node Created Successfully') else: @@ -409,8 +410,8 @@ async def get_structured_schema(uri=Form(), userName=Form(), password=Form(), da graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(get_labels_and_relationtypes, graph) logging.info(f'Schema result from DB: {result}') - josn_obj = {'api_name':'schema','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'schema','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response('Success', data=result) except Exception as e: message="Unable to get the labels and relationtypes from neo4j database" @@ -478,8 +479,8 @@ async def delete_document_and_entities(uri=Form(), result, files_list_size = await asyncio.to_thread(graphDb_data_Access.delete_file_from_graph, filenames, source_types, deleteEntities, MERGED_DIR, uri) # entities_count = result[0]['deletedEntities'] if 'deletedEntities' in result[0] else 0 message = f"Deleted {files_list_size} documents with entities from database" - josn_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj) return create_api_response('Success',message=message) except Exception as e: job_status = "Failed" @@ -635,4 +636,4 @@ async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), da gc.collect() if __name__ == "__main__": - uvicorn.run(app) \ No newline at end of file + uvicorn.run(app) diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration_new.py index 1c0bc254c..eeac78c1e 100644 --- a/backend/src/QA_integration_new.py +++ b/backend/src/QA_integration_new.py @@ -41,26 +41,26 @@ def get_neo4j_retriever(graph, retrieval_query,document_names,mode,index_name="vector",keyword_index="keyword", search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): try: - if mode == "hybrid": - # neo_db = Neo4jVector.from_existing_graph( - # embedding=EMBEDDING_FUNCTION, - # index_name=index_name, - # retrieval_query=retrieval_query, - # graph=graph, - # search_type="hybrid", - # node_label="Chunk", - # embedding_node_property="embedding", - # text_node_properties=["text"] - # # keyword_index_name=keyword_index - # ) - neo_db = Neo4jVector.from_existing_index( + if mode == "fulltext" or mode == "graph + vector + fulltext": + neo_db = Neo4jVector.from_existing_graph( embedding=EMBEDDING_FUNCTION, index_name=index_name, retrieval_query=retrieval_query, graph=graph, search_type="hybrid", + node_label="Chunk", + embedding_node_property="embedding", + text_node_properties=["text"], keyword_index_name=keyword_index ) + # neo_db = Neo4jVector.from_existing_index( + # embedding=EMBEDDING_FUNCTION, + # index_name=index_name, + # retrieval_query=retrieval_query, + # graph=graph, + # search_type="hybrid", + # keyword_index_name=keyword_index + # ) logging.info(f"Successfully retrieved Neo4jVector index '{index_name}' and keyword index '{keyword_index}'") else: neo_db = Neo4jVector.from_existing_index( @@ -374,7 +374,7 @@ def QA_RAG(graph, model, question, document_names,session_id, mode): "user": "chatbot" } return result - elif mode == "vector" or mode == "hybrid": + elif mode == "vector" or mode == "fulltext": retrieval_query = VECTOR_SEARCH_QUERY else: retrieval_query = VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT) diff --git a/backend/src/main.py b/backend/src/main.py index f7dd190ef..a7d5058a0 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -264,6 +264,7 @@ def processing_source(uri, userName, password, database, model, file_name, pages graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.get_current_status_document_node(file_name) + print(result) logging.info("Break down file into chunks") bad_chars = ['"', "\n", "'"] for i in range(0,len(pages)): @@ -277,91 +278,97 @@ def processing_source(uri, userName, password, database, model, file_name, pages create_chunks_obj = CreateChunksofDocument(pages, graph) chunks = create_chunks_obj.split_file_into_chunks() chunkId_chunkDoc_list = create_relation_between_chunks(graph,file_name,chunks) - if result[0]['Status'] != 'Processing': - obj_source_node = sourceNode() - status = "Processing" - obj_source_node.file_name = file_name - obj_source_node.status = status - obj_source_node.total_chunks = len(chunks) - obj_source_node.total_pages = len(pages) - obj_source_node.model = model - logging.info(file_name) - logging.info(obj_source_node) - graphDb_data_Access.update_source_node(obj_source_node) - - logging.info('Update the status as Processing') - update_graph_chunk_processed = int(os.environ.get('UPDATE_GRAPH_CHUNKS_PROCESSED')) - # selected_chunks = [] - is_cancelled_status = False - job_status = "Completed" - node_count = 0 - rel_count = 0 - for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed): - select_chunks_upto = i+update_graph_chunk_processed - logging.info(f'Selected Chunks upto: {select_chunks_upto}') - if len(chunkId_chunkDoc_list) <= select_chunks_upto: - select_chunks_upto = len(chunkId_chunkDoc_list) - selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto] + + if len(result) > 0: + if result[0]['Status'] != 'Processing': + obj_source_node = sourceNode() + status = "Processing" + obj_source_node.file_name = file_name + obj_source_node.status = status + obj_source_node.total_chunks = len(chunks) + obj_source_node.total_pages = len(pages) + obj_source_node.model = model + logging.info(file_name) + logging.info(obj_source_node) + graphDb_data_Access.update_source_node(obj_source_node) + + logging.info('Update the status as Processing') + update_graph_chunk_processed = int(os.environ.get('UPDATE_GRAPH_CHUNKS_PROCESSED')) + # selected_chunks = [] + is_cancelled_status = False + job_status = "Completed" + node_count = 0 + rel_count = 0 + for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed): + select_chunks_upto = i+update_graph_chunk_processed + logging.info(f'Selected Chunks upto: {select_chunks_upto}') + if len(chunkId_chunkDoc_list) <= select_chunks_upto: + select_chunks_upto = len(chunkId_chunkDoc_list) + selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto] + result = graphDb_data_Access.get_current_status_document_node(file_name) + is_cancelled_status = result[0]['is_cancelled'] + logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}") + if bool(is_cancelled_status) == True: + job_status = "Cancelled" + logging.info('Exit from running loop of processing file') + exit + else: + node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count) + end_time = datetime.now() + processed_time = end_time - start_time + + obj_source_node = sourceNode() + obj_source_node.file_name = file_name + obj_source_node.updated_at = end_time + obj_source_node.processing_time = processed_time + obj_source_node.node_count = node_count + obj_source_node.processed_chunk = select_chunks_upto + obj_source_node.relationship_count = rel_count + graphDb_data_Access.update_source_node(obj_source_node) + result = graphDb_data_Access.get_current_status_document_node(file_name) is_cancelled_status = result[0]['is_cancelled'] - logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}") if bool(is_cancelled_status) == True: - job_status = "Cancelled" - logging.info('Exit from running loop of processing file') - exit - else: - node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count) - end_time = datetime.now() - processed_time = end_time - start_time - - obj_source_node = sourceNode() - obj_source_node.file_name = file_name - obj_source_node.updated_at = end_time - obj_source_node.processing_time = processed_time - obj_source_node.node_count = node_count - obj_source_node.processed_chunk = select_chunks_upto - obj_source_node.relationship_count = rel_count - graphDb_data_Access.update_source_node(obj_source_node) - - result = graphDb_data_Access.get_current_status_document_node(file_name) - is_cancelled_status = result[0]['is_cancelled'] - if bool(is_cancelled_status) == True: - logging.info(f'Is_cancelled True at the end extraction') - job_status = 'Cancelled' - logging.info(f'Job Status at the end : {job_status}') - end_time = datetime.now() - processed_time = end_time - start_time - obj_source_node = sourceNode() - obj_source_node.file_name = file_name - obj_source_node.status = job_status - obj_source_node.processing_time = processed_time + logging.info(f'Is_cancelled True at the end extraction') + job_status = 'Cancelled' + logging.info(f'Job Status at the end : {job_status}') + end_time = datetime.now() + processed_time = end_time - start_time + obj_source_node = sourceNode() + obj_source_node.file_name = file_name + obj_source_node.status = job_status + obj_source_node.processing_time = processed_time - graphDb_data_Access.update_source_node(obj_source_node) - logging.info('Updated the nodeCount and relCount properties in Document node') - logging.info(f'file:{file_name} extraction has been completed') + graphDb_data_Access.update_source_node(obj_source_node) + logging.info('Updated the nodeCount and relCount properties in Document node') + logging.info(f'file:{file_name} extraction has been completed') - # merged_file_path have value only when file uploaded from local - - if is_uploaded_from_local: - gcs_file_cache = os.environ.get('GCS_FILE_CACHE') - if gcs_file_cache == 'True': - folder_name = create_gcs_bucket_folder_name_hashed(uri, file_name) - delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name) - else: - delete_uploaded_local_file(merged_file_path, file_name) + # merged_file_path have value only when file uploaded from local - return { - "fileName": file_name, - "nodeCount": node_count, - "relationshipCount": rel_count, - "processingTime": round(processed_time.total_seconds(),2), - "status" : job_status, - "model" : model, - "success_count" : 1 - } + if is_uploaded_from_local: + gcs_file_cache = os.environ.get('GCS_FILE_CACHE') + if gcs_file_cache == 'True': + folder_name = create_gcs_bucket_folder_name_hashed(uri, file_name) + delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name) + else: + delete_uploaded_local_file(merged_file_path, file_name) + + return { + "fileName": file_name, + "nodeCount": node_count, + "relationshipCount": rel_count, + "processingTime": round(processed_time.total_seconds(),2), + "status" : job_status, + "model" : model, + "success_count" : 1 + } + else: + logging.info('File does not process because it\'s already in Processing status') else: - logging.info('File does not process because it\'s already in Processing status') + error_message = "Unable to get the status of docuemnt node." + logging.error(error_message) + raise Exception(error_message) def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship, node_count, rel_count): #create vector index and update chunk node with embedding diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 903999a51..c5f8e98a4 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -276,4 +276,4 @@ RETURN text, avg_score as score, {{length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails}} AS metadata """ -YOUTUBE_CHUNK_SIZE_SECONDS = 60 \ No newline at end of file +YOUTUBE_CHUNK_SIZE_SECONDS = 60 diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index cd662cbdc..821cc6b5c 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -1,270 +1,243 @@ +import json +import os +import shutil +import logging +import pandas as pd +from datetime import datetime as dt +from dotenv import load_dotenv + from score import * from src.main import * -import logging from src.QA_integration_new import QA_RAG from langserve import add_routes -import asyncio -import os -from dotenv import load_dotenv -import pandas as pd -from datetime import datetime as dt -uri = '' -userName = '' -password = '' -# model = 'openai-gpt-3.5' -database = 'neo4j' +# Load environment variables if needed +load_dotenv() + +# Constants +URI = '' +USERNAME = '' +PASSWORD = '' +DATABASE = 'neo4j' CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") MERGED_DIR = os.path.join(os.path.dirname(__file__), "merged_files") -graph = create_graph_database_connection(uri, userName, password, database) - - -def test_graph_from_file_local_file(model_name): - model = model_name - file_name = 'About Amazon.pdf' - # shutil.copyfile('data/Bank of America Q23.pdf', 'backend/src/merged_files/Bank of America Q23.pdf') - shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf', - '/workspaces/llm-graph-builder/backend/merged_files/About Amazon.pdf') - obj_source_node = sourceNode() - obj_source_node.file_name = file_name - obj_source_node.file_type = 'pdf' - obj_source_node.file_size = '1087' - obj_source_node.file_source = 'local file' - obj_source_node.model = model - obj_source_node.created_at = datetime.now() - graphDb_data_Access = graphDBdataAccess(graph) - graphDb_data_Access.create_source_node(obj_source_node) - merged_file_path = os.path.join(MERGED_DIR, file_name) - print(merged_file_path) - - - local_file_result = extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, '', '') - # final_list.append(local_file_result) - print(local_file_result) - - logging.info("Info: ") - try: - assert local_file_result['status'] == 'Completed' and local_file_result['nodeCount'] > 0 and local_file_result[ - 'relationshipCount'] > 0 - return local_file_result - print("Success") - except AssertionError as e: - print("Fail: ", e) - return local_file_result - - -def test_graph_from_file_local_file_failed(model_name): - model = model_name - file_name = 'Not_exist.pdf' - try: - obj_source_node = sourceNode() - obj_source_node.file_name = file_name - obj_source_node.file_type = 'pdf' - obj_source_node.file_size = '0' - obj_source_node.file_source = 'local file' - obj_source_node.model = model - obj_source_node.created_at = datetime.now() - graphDb_data_Access = graphDBdataAccess(graph) - graphDb_data_Access.create_source_node(obj_source_node) - - local_file_result = extract_graph_from_file_local_file(graph, model, file_name, merged_file_path, '', '') - - print(local_file_result) - except AssertionError as e: - print('Failed due to file does not exist means not uploaded or accidentaly deleteled from server') - print("Failed: Error from extract function ", e) - -# Check for Wikipedia file to be test -def test_graph_from_Wikipedia(model_name): - model = model_name - wiki_query = 'https://en.wikipedia.org/wiki/Ram_Mandir' - source_type = 'Wikipedia' - file_name = "Ram_Mandir" - create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type) - wikiresult = extract_graph_from_file_Wikipedia(uri, userName, password, database, model, file_name, 1, 'en', '', '') - logging.info("Info: Wikipedia test done") - print(wikiresult) - + +# Initialize database connection +graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + +def create_source_node_local(graph, model, file_name): + """Creates a source node for a local file.""" + source_node = sourceNode() + source_node.file_name = file_name + source_node.file_type = 'pdf' + source_node.file_size = '1087' + source_node.file_source = 'local file' + source_node.model = model + source_node.created_at = dt.now() + graphDB_data_Access = graphDBdataAccess(graph) + graphDB_data_Access.create_source_node(source_node) + return source_node + +def test_graph_from_file_local(model_name): + """Test graph creation from a local file.""" + file_name = 'About Amazon.pdf' + shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf', + os.path.join(MERGED_DIR, file_name)) + + create_source_node_local(graph, model_name, file_name) + merged_file_path = os.path.join(MERGED_DIR, file_name) + + local_file_result = extract_graph_from_file_local_file( + URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '' + ) + logging.info("Local file processing complete") + print(local_file_result) + try: - assert wikiresult['status'] == 'Completed' and wikiresult['nodeCount'] > 0 and wikiresult['relationshipCount'] > 0 - return wikiresult + assert local_file_result['status'] == 'Completed' + assert local_file_result['nodeCount'] > 0 + assert local_file_result['relationshipCount'] > 0 print("Success") except AssertionError as e: - print("Fail ", e) - return wikiresult - + print("Fail: ", e) -def test_graph_from_Wikipedia_failed(): - wiki_query = 'Test QA 123456' - source_type = 'Wikipedia' - try: - logging.info("Created source node for wikipedia") - create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type) - except AssertionError as e: - print("Fail ", e) + return local_file_result -# Check for Youtube_video to be Success -def test_graph_from_youtube_video(model_name): - model = model_name - source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' - source_type = 'youtube' +def test_graph_from_wikipedia(model_name): + """Test graph creation from a Wikipedia page.""" + wiki_query = 'https://en.wikipedia.org/wiki/Ram_Mandir' + source_type = 'Wikipedia' + file_name = "Ram_Mandir" + create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, source_type) - create_source_node_graph_url_youtube(graph, model, source_url, source_type) - youtuberesult = extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, '', '') + wiki_result = extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 1, 'en', '', '') + logging.info("Wikipedia test done") + print(wiki_result) - logging.info("Info: Youtube Video test done") - print(youtuberesult) try: - assert youtuberesult['status'] == 'Completed' and youtuberesult['nodeCount'] > 1 and youtuberesult[ - 'relationshipCount'] > 1 - return youtuberesult + assert wiki_result['status'] == 'Completed' + assert wiki_result['nodeCount'] > 0 + assert wiki_result['relationshipCount'] > 0 print("Success") except AssertionError as e: - print("Failed ", e) - return youtuberesult - -# Check for Youtube_video to be Failed - -def test_graph_from_youtube_video_failed(): - url = 'https://www.youtube.com/watch?v=U9mJuUkhUzk' - source_type = 'youtube' - - create_source_node_graph_url_youtube(graph, model, url, source_type) - youtuberesult = extract_graph_from_file_youtube(graph, model, url, ',', ',') - # print(result) - print(youtuberesult) - try: - assert youtuberesult['status'] == 'Completed' - return youtuberesult - except AssertionError as e: - print("Failed ", e) - - -# Check for the GCS file to be uploaded, process and completed - -def test_graph_from_file_test_gcs(): - bucket_name = 'test' - folder_name = 'test' - source_type = 'gcs test bucket' - file_name = 'Neuralink brain chip patient playing chess.pdf' - create_source_node_graph_url_gcs(graph, model, bucket_name, folder_name, source_type) - gcsresult = extract_graph_from_file_gcs(graph, model, bucket_name, folder_name, file_name, '', '') - - logging.info("Info") - print(gcsresult) - - try: - assert gcsresult['status'] == 'Completed' and gcsresult['nodeCount'] > 10 and gcsresult['relationshipCount'] > 5 - print("Success") - except AssertionError as e: - print("Failed ", e) - - -def test_graph_from_file_test_gcs_failed(): - bucket_name = 'llm_graph_test' - folder_name = 'test' - source_type = 'gcs bucket' - # file_name = 'Neuralink brain chip patient playing chess.pdf' - try: - create_source_node_graph_url_gcs(graph, model, bucket_name, folder_name, source_type) - print("GCS: Create source node failed due to bucket not exist") - except AssertionError as e: - print("Failed ", e) - - -def test_graph_from_file_test_s3_failed(): - source_url = 's3://development-llm-test/' - try: - create_source_node_graph_url_s3(graph, model, source_url, 'test123', 'pwd123') - # assert result['status'] == 'Failed' - # print("S3 created source node failed die to wrong access key id and secret") - except AssertionError as e: - print("Failed ", e) - - -# Check the Functionality of Chatbot QnA for mode 'graph+vector' -def test_chatbot_QnA(model_name): - model = model_name - QA_n_RAG = QA_RAG(graph, model, 'Tell me about amazon', '[]', 1, 'graph+vector') + print("Fail: ", e) + + return wiki_result + +def test_graph_website(model_name): + """Test graph creation from a Website page.""" + #graph, model, source_url, source_type + source_url = 'https://www.amazon.com/' + source_type = 'web-url' + create_source_node_graph_web_url(graph, model_name, source_url, source_type) + + weburl_result = extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', '') + logging.info("WebUrl test done") + print(weburl_result) - print(QA_n_RAG) - print(len(QA_n_RAG['message'])) try: - assert len(QA_n_RAG['message']) > 20 - return QA_n_RAG + assert weburl_result['status'] == 'Completed' + assert weburl_result['nodeCount'] > 0 + assert weburl_result['relationshipCount'] > 0 print("Success") except AssertionError as e: - print("Failed ", e) - return QA_n_RAG + print("Fail: ", e) + return weburl_result -# Check the Functionality of Chatbot QnA for mode 'vector' -def test_chatbot_QnA_vector(model_name): - model = model_name - QA_n_RAG_vector = QA_RAG(graph, model, 'Tell me about amazon', '[]', 1, 'vector') +def test_graph_from_youtube_video(model_name): + """Test graph creation from a YouTube video.""" + source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' + source_type = 'youtube' + create_source_node_graph_url_youtube(graph, model_name, source_url, source_type) + youtube_result = extract_graph_from_file_youtube( + URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', '' + ) + logging.info("YouTube Video test done") + print(youtube_result) - print(QA_n_RAG_vector) - print(len(QA_n_RAG_vector['message'])) try: - assert len(QA_n_RAG_vector['message']) > 20 - return QA_n_RAG_vector + assert youtube_result['status'] == 'Completed' + assert youtube_result['nodeCount'] > 1 + assert youtube_result['relationshipCount'] > 1 print("Success") except AssertionError as e: - print("Failed ", e) - return QA_n_RAG_vector - -# Check the Functionality of Chatbot QnA for mode 'hybrid' + print("Failed: ", e) -def test_chatbot_QnA_hybrid(model_name): - model = model_name - QA_n_RAG_hybrid = QA_RAG(graph, model, 'Tell me about amazon', '[]', 1, 'hybrid') + return youtube_result +def test_chatbot_qna(model_name, mode='vector'): + """Test chatbot QnA functionality for different modes.""" + QA_n_RAG = QA_RAG(graph, model_name, 'Tell me about amazon', '[]', 1, mode) + print(QA_n_RAG) + print(len(QA_n_RAG['message'])) - print(QA_n_RAG_hybrid) - print(len(QA_n_RAG_hybrid['message'])) try: - assert len(QA_n_RAG_hybrid['message']) > 20 - return QA_n_RAG_hybrid + assert len(QA_n_RAG['message']) > 20 + return QA_n_RAG print("Success") except AssertionError as e: print("Failed ", e) - return QA_n_RAG_hybrid - - + return QA_n_RAG + +#Get Test disconnected_nodes list +def disconected_nodes(): + #graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes() + print(nodes_list[0]["e"]["elementId"]) + status = "False" + + if total_nodes['total']>0: + status = "True" + else: + status = "False" + + return nodes_list[0]["e"]["elementId"], status + +#Test Delete delete_disconnected_nodes list +def delete_disconected_nodes(lst_element_id): + print(f'disconnect elementid list {lst_element_id}') + #graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id)) + print(f'delete disconnect api result {result}') + if not result: + return "True" + else: + return "False" + +#Test Get Duplicate_nodes +def get_duplicate_nodes(): + #graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + nodes_list, total_nodes = graphDb_data_Access.get_duplicate_nodes_list() + if total_nodes['total']>0: + return "True" + else: + return "False" + +#Test populate_graph_schema +def test_populate_graph_schema_from_text(model): + result_schema = populate_graph_schema_from_text('When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble.', model, True) + print(result_schema) + return result_schema + +# def compare_graph_results(results): +# """ +# Compare graph results across different models. +# Add custom logic here to compare graph data, nodes, and relationships. +# """ +# # Placeholder logic for comparison +# print("Comparing results...") +# for i in range(len(results) - 1): +# result_a = results[i] +# result_b = results[i + 1] +# if result_a == result_b: +# print(f"Result {i} is identical to result {i+1}") +# else: +# print(f"Result {i} differs from result {i+1}") + +def run_tests(): + final_list = [] + error_list = [] + models = ['openai-gpt-3.5', 'openai-gpt-4o'] + + for model_name in models: + try: + final_list.append(test_graph_from_file_local(model_name)) + final_list.append(test_graph_from_wikipedia(model_name)) + final_list.append(test_populate_graph_schema_from_text(model_name)) + final_list.append(test_graph_website(model_name)) + final_list.append(test_graph_from_youtube_video(model_name)) + final_list.append(test_chatbot_qna(model_name)) + final_list.append(test_chatbot_qna(model_name, mode='vector')) + final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext')) + except Exception as e: + error_list.append((model_name, str(e))) + # #Compare and log diffrences in graph results + # # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results + # test_populate_graph_schema_from_text('openai-gpt-4o') + dis_elementid, dis_status = disconected_nodes() + lst_element_id = [dis_elementid] + delt = delete_disconected_nodes(lst_element_id) + dup = get_duplicate_nodes() + # schma = test_populate_graph_schema_from_text(model) + # Save final results to CSV + df = pd.DataFrame(final_list) + print(df) + df['execution_date'] = dt.today().strftime('%Y-%m-%d') + df['disconnected_nodes']=dis_status + df['get_duplicate_nodes']=dup + df['delete_disconected_nodes']=delt + # df['test_populate_graph_schema_from_text'] = schma + df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + + # Save error details to CSV + df_errors = pd.DataFrame(error_list, columns=['Model', 'Error']) + df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_errors.to_csv(f"Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) if __name__ == "__main__": - final_list = [] - for model_name in ['openai-gpt-3.5','azure_ai_gpt_35','azure_ai_gpt_4o','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet']: - - # local file - response = test_graph_from_file_local_file(model_name) - final_list.append(response) - - # # Wikipedia Test - response = test_graph_from_Wikipedia(model_name) - final_list.append(response) - - # # Youtube Test - response= test_graph_from_youtube_video(model_name) - final_list.append(response) - # # print(final_list) - - # # test_graph_from_file_test_gcs(model_name) # GCS Test - - # #chatbot 'graph+vector' - response = test_chatbot_QnA(model_name) - final_list.append(response) - - # #chatbot 'vector' - response = test_chatbot_QnA_vector(model_name) - final_list.append(response) - - # #chatbot 'hybrid' - response = test_chatbot_QnA_hybrid(model_name) - final_list.append(response) - - # test_graph_from_file_test_s3_failed() # S3 Failed Test Case - df = pd.DataFrame(final_list) - df['execution_date']= datetime.today().strftime('%Y-%m-%d') - df.to_csv(f"Integration_TestResult_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) \ No newline at end of file + run_tests() \ No newline at end of file diff --git a/example.env b/example.env index b443fd18d..23bcc6e06 100644 --- a/example.env +++ b/example.env @@ -1,27 +1,27 @@ # Mandatory -OPENAI_API_KEY = "" -DIFFBOT_API_KEY = "" +OPENAI_API_KEY="" +DIFFBOT_API_KEY="" # Optional Backend -EMBEDDING_MODEL = "all-MiniLM-L6-v2" -IS_EMBEDDING = "true" -KNN_MIN_SCORE = "0.94" +EMBEDDING_MODEL="all-MiniLM-L6-v2" +IS_EMBEDDING="true" +KNN_MIN_SCORE="0.94" # Enable Gemini (default is False) | Can be False or True -GEMINI_ENABLED = False +GEMINI_ENABLED=False # LLM_MODEL_CONFIG_ollama_llama3="llama3,http://host.docker.internal:11434" # Enable Google Cloud logs (default is False) | Can be False or True -GCP_LOG_METRICS_ENABLED = False -NUMBER_OF_CHUNKS_TO_COMBINE = 6 -UPDATE_GRAPH_CHUNKS_PROCESSED = 20 -NEO4J_URI = "neo4j://database:7687" -NEO4J_USERNAME = "neo4j" -NEO4J_PASSWORD = "password" -LANGCHAIN_API_KEY = "" -LANGCHAIN_PROJECT = "" -LANGCHAIN_TRACING_V2 = "true" -LANGCHAIN_ENDPOINT = "https://api.smith.langchain.com" -GCS_FILE_CACHE = False +GCP_LOG_METRICS_ENABLED=False +NUMBER_OF_CHUNKS_TO_COMBINE=6 +UPDATE_GRAPH_CHUNKS_PROCESSED=20 +NEO4J_URI="neo4j://database:7687" +NEO4J_USERNAME="neo4j" +NEO4J_PASSWORD="password" +LANGCHAIN_API_KEY="" +LANGCHAIN_PROJECT="" +LANGCHAIN_TRACING_V2="true" +LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" +GCS_FILE_CACHE=False ENTITY_EMBEDDING=True # Optional Frontend @@ -30,9 +30,8 @@ VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL= VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web" VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" # ",ollama_llama3" VITE_ENV="DEV" -VITE_TIME_PER_CHUNK=4 VITE_TIME_PER_PAGE=50 VITE_CHUNK_SIZE=5242880 VITE_GOOGLE_CLIENT_ID="" VITE_CHAT_MODES="" -VITE_BATCH_SIZE=2 \ No newline at end of file +VITE_BATCH_SIZE=2 diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 7a31d5bcf..c3a7c1c82 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -6,7 +6,6 @@ ARG VITE_REACT_APP_SOURCES="" ARG VITE_LLM_MODELS="" ARG VITE_GOOGLE_CLIENT_ID="" ARG VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true" -ARG VITE_TIME_PER_CHUNK=4 ARG VITE_TIME_PER_PAGE=50 ARG VITE_LARGE_FILE_SIZE=5242880 ARG VITE_CHUNK_SIZE=5242880 @@ -23,8 +22,8 @@ RUN VITE_BACKEND_API_URL=$VITE_BACKEND_API_URL \ VITE_LLM_MODELS=$VITE_LLM_MODELS \ VITE_GOOGLE_CLIENT_ID=$VITE_GOOGLE_CLIENT_ID \ VITE_BLOOM_URL=$VITE_BLOOM_URL \ - VITE_TIME_PER_CHUNK=$VITE_TIME_PER_CHUNK \ VITE_CHUNK_SIZE=$VITE_CHUNK_SIZE \ + VITE_TIME_PER_PAGE=$VITE_TIME_PER_PAGE \ VITE_ENV=$VITE_ENV \ VITE_LARGE_FILE_SIZE=${VITE_LARGE_FILE_SIZE} \ VITE_CHAT_MODES=$VITE_CHAT_MODES \ diff --git a/frontend/example.env b/frontend/example.env index 05b8cdf60..63bd3e7c3 100644 --- a/frontend/example.env +++ b/frontend/example.env @@ -3,7 +3,6 @@ VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL= VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web" VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" VITE_ENV="DEV" -VITE_TIME_PER_CHUNK=4 VITE_TIME_PER_PAGE=50 VITE_CHUNK_SIZE=5242880 VITE_LARGE_FILE_SIZE=5242880 diff --git a/frontend/src/API/Index.ts b/frontend/src/API/Index.ts new file mode 100644 index 000000000..f4ad15cbe --- /dev/null +++ b/frontend/src/API/Index.ts @@ -0,0 +1,7 @@ +import axios from 'axios'; +import { url } from '../utils/Utils'; + +const api = axios.create({ + baseURL: url(), +}); +export default api; diff --git a/frontend/src/App.css b/frontend/src/App.css index ff084ffae..fe285c972 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -233,10 +233,8 @@ letter-spacing: 0; line-height: 1.25rem; width: max-content; - height: 30px; text-overflow: ellipsis; white-space: nowrap; - overflow: hidden; } .ndl-widget-content>div { @@ -365,4 +363,13 @@ .widthunset{ width: initial !important; height: initial !important; +} + +.text-input-container { + transition: width 1.5s ease; + /* width: 100dvh; */ +} + +.text-input-container.search-initiated { + width: 60dvh; } \ No newline at end of file diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 7bd837299..89c15ea72 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -18,13 +18,20 @@ import wikipedialogo from '../../assets/images/wikipedia.svg'; import youtubelogo from '../../assets/images/youtube.svg'; import gcslogo from '../../assets/images/gcs.webp'; import s3logo from '../../assets/images/s3logo.png'; -import { Chunk, Entity, ExtendedNode, GroupedEntity, UserCredentials, chatInfoMessage } from '../../types'; +import { + Chunk, + Entity, + ExtendedNode, + ExtendedRelationship, + GroupedEntity, + UserCredentials, + chatInfoMessage, +} from '../../types'; import { useContext, useEffect, useMemo, useState } from 'react'; import HoverableLink from '../UI/HoverableLink'; import GraphViewButton from '../Graph/GraphViewButton'; import { chunkEntitiesAPI } from '../../services/ChunkEntitiesInfo'; import { useCredentials } from '../../context/UserCredentials'; -import type { Relationship } from '@neo4j-nvl/base'; import { calcWordColor } from '@neo4j-devtools/word-color'; import ReactMarkdown from 'react-markdown'; import { GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; @@ -51,7 +58,7 @@ const ChatInfoModal: React.FC = ({ const [loading, setLoading] = useState(false); const { userCredentials } = useCredentials(); const [nodes, setNodes] = useState([]); - const [relationships, setRelationships] = useState([]); + const [relationships, setRelationships] = useState([]); const [chunks, setChunks] = useState([]); const themeUtils = useContext(ThemeWrapperContext); const [, copy] = useCopyToClipboard(); @@ -168,7 +175,11 @@ const ChatInfoModal: React.FC = ({ ) : ( {mode != 'graph' ? Sources used : <>} - {mode === 'graph+vector' || mode === 'graph' ? Top Entities used : <>} + {mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? ( + Top Entities used + ) : ( + <> + )} {mode === 'graph' && cypher_query?.trim().length ? ( Generated Cypher Query ) : ( diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 4c0d54bc7..e82dfea4d 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -31,7 +31,12 @@ export default function ChatModeToggle({ () => chatModes?.map((m) => { return { - title: capitalize(m), + title: m.includes('+') + ? m + .split('+') + .map((s) => capitalize(s)) + .join('+') + : capitalize(m), onClick: () => { setchatMode(m); }, diff --git a/frontend/src/components/ChatBot/Info/InfoModal.tsx b/frontend/src/components/ChatBot/Info/InfoModal.tsx index 0dd325731..cf1bbca47 100644 --- a/frontend/src/components/ChatBot/Info/InfoModal.tsx +++ b/frontend/src/components/ChatBot/Info/InfoModal.tsx @@ -6,13 +6,20 @@ import wikipedialogo from '../../../assets/images/Wikipedia-logo-v2.svg'; import youtubelogo from '../../../assets/images/youtube.png'; import gcslogo from '../../../assets/images/gcs.webp'; import s3logo from '../../../assets/images/s3logo.png'; -import { Chunk, Entity, ExtendedNode, GroupedEntity, UserCredentials, chatInfoMessage } from '../../../types'; +import { + Chunk, + Entity, + ExtendedNode, + ExtendedRelationship, + GroupedEntity, + UserCredentials, + chatInfoMessage, +} from '../../../types'; import { useEffect, useMemo, useState } from 'react'; import HoverableLink from '../../UI/HoverableLink'; import GraphViewButton from '../../Graph/GraphViewButton'; import { chunkEntitiesAPI } from '../../../services/ChunkEntitiesInfo'; import { useCredentials } from '../../../context/UserCredentials'; -import type { Relationship } from '@neo4j-nvl/base'; import { calcWordColor } from '@neo4j-devtools/word-color'; import ReactMarkdown from 'react-markdown'; import { GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; @@ -23,7 +30,7 @@ const InfoModal: React.FC = ({ sources, model, total_tokens, re const [loading, setLoading] = useState(false); const { userCredentials } = useCredentials(); const [nodes, setNodes] = useState([]); - const [relationships, setRelationships] = useState([]); + const [relationships, setRelationships] = useState([]); const [chunks, setChunks] = useState([]); const parseEntity = (entity: Entity) => { const { labels, properties } = entity; diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index ec4fad9bb..4bde5a9b4 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -31,6 +31,8 @@ import FallBackDialog from './UI/FallBackDialog'; import DeletePopUp from './Popups/DeletePopUp/DeletePopUp'; import GraphEnhancementDialog from './Popups/GraphEnhancementDialog'; import { tokens } from '@neo4j-ndl/base'; +import axios from 'axios'; + const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); let afterFirstRender = false; @@ -180,6 +182,7 @@ const Content: React.FC = ({ }; const extractHandler = async (fileItem: CustomFile, uid: string) => { + queue.remove(fileItem.name as string); try { setFilesData((prevfiles) => prevfiles.map((curfile) => { @@ -252,28 +255,45 @@ const Content: React.FC = ({ }); } } catch (err: any) { - const error = JSON.parse(err.message); - if (Object.keys(error).includes('fileName')) { - const { message } = error; - const { fileName } = error; - const errorMessage = error.message; - setalertDetails({ - showAlert: true, - alertType: 'error', - alertMessage: message, - }); - setFilesData((prevfiles) => - prevfiles.map((curfile) => { - if (curfile.name == fileName) { - return { - ...curfile, - status: 'Failed', - errorMessage, - }; - } - return curfile; - }) - ); + if (err instanceof Error) { + try { + const error = JSON.parse(err.message); + if (Object.keys(error).includes('fileName')) { + setProcessedCount((prev) => { + if (prev == batchSize) { + return batchSize - 1; + } + return prev + 1; + }); + const { message, fileName } = error; + queue.remove(fileName); + const errorMessage = error.message; + setalertDetails({ + showAlert: true, + alertType: 'error', + alertMessage: message, + }); + setFilesData((prevfiles) => + prevfiles.map((curfile) => { + if (curfile.name == fileName) { + return { ...curfile, status: 'Failed', errorMessage }; + } + return curfile; + }) + ); + } else { + console.error('Unexpected error format:', error); + } + } catch (parseError) { + if (axios.isAxiosError(err)) { + const axiosErrorMessage = err.response?.data?.message || err.message; + console.error('Axios error occurred:', axiosErrorMessage); + } else { + console.error('An unexpected error occurred:', err.message); + } + } + } else { + console.error('An unknown error occurred:', err); } } }; @@ -302,7 +322,10 @@ const Content: React.FC = ({ return data; }; - const addFilesToQueue = (remainingFiles: CustomFile[]) => { + const addFilesToQueue = async (remainingFiles: CustomFile[]) => { + if (!remainingFiles.length) { + await postProcessing(userCredentials as UserCredentials, postProcessingTasks); + } remainingFiles.forEach((f) => { setFilesData((prev) => prev.map((pf) => { @@ -379,13 +402,11 @@ const Content: React.FC = ({ } Promise.allSettled(data).then(async (_) => { setextractLoading(false); - await postProcessing(userCredentials as UserCredentials, postProcessingTasks); }); - } else if (queueFiles && !queue.isEmpty()) { + } else if (queueFiles && !queue.isEmpty() && processingFilesCount < batchSize) { data = scheduleBatchWiseProcess(queue.items, true); Promise.allSettled(data).then(async (_) => { setextractLoading(false); - await postProcessing(userCredentials as UserCredentials, postProcessingTasks); }); } else { addFilesToQueue(filesTobeProcessed as CustomFile[]); @@ -405,7 +426,6 @@ const Content: React.FC = ({ } Promise.allSettled(data).then(async (_) => { setextractLoading(false); - await postProcessing(userCredentials as UserCredentials, postProcessingTasks); }); } else { const selectedNewFiles = childRef.current?.getSelectedRows().filter((f) => f.status === 'New'); diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index f046bb8ff..ba949aec0 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -1,6 +1,6 @@ import { Dropdown, Tip } from '@neo4j-ndl/react'; import { OptionType, ReusableDropdownProps } from '../types'; -import { useMemo } from 'react'; +import { useMemo, useReducer } from 'react'; import { capitalize } from '../utils/Utils'; const DropdownComponent: React.FC = ({ @@ -13,6 +13,7 @@ const DropdownComponent: React.FC = ({ isDisabled, value, }) => { + const [disableTooltip, toggleDisableState] = useReducer((state) => !state, false); const handleChange = (selectedOption: OptionType | null | void) => { onSelect(selectedOption); }; @@ -20,7 +21,7 @@ const DropdownComponent: React.FC = ({ return ( <>
- + = ({ menuPlacement: 'auto', isDisabled: isDisabled, value: value, + onMenuOpen: () => { + toggleDisableState(); + }, + onMenuClose: () => { + toggleDisableState(); + }, }} size='medium' fluid diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 70dff8f1a..23310eaf4 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -761,6 +761,13 @@ const FileTable = forwardRef((props, ref) => { return curfile; }) ); + setProcessedCount((prev) => { + if (prev == batchSize) { + return batchSize - 1; + } + return prev + 1; + }); + queue.remove(fileName); } else { let errorobj = { error: res.data.error, message: res.data.message, fileName }; throw new Error(JSON.stringify(errorobj)); diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 9c0d5d190..39438b788 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -1,6 +1,23 @@ -import { Banner, Dialog, Flex, IconButtonArray, LoadingSpinner, Typography } from '@neo4j-ndl/react'; +import { + Banner, + Dialog, + Flex, + IconButton, + IconButtonArray, + LoadingSpinner, + TextInput, + Typography, + useDebounce, +} from '@neo4j-ndl/react'; import { useCallback, useEffect, useRef, useState } from 'react'; -import { ExtendedNode, GraphType, GraphViewModalProps, Scheme, UserCredentials } from '../../types'; +import { + ExtendedNode, + ExtendedRelationship, + GraphType, + GraphViewModalProps, + Scheme, + UserCredentials, +} from '../../types'; import { InteractiveNvlWrapper } from '@neo4j-nvl/react'; import NVL from '@neo4j-nvl/base'; import type { Node, Relationship } from '@neo4j-nvl/base'; @@ -9,16 +26,26 @@ import { ArrowPathIconOutline, DragIcon, FitToScreenIcon, + MagnifyingGlassIconOutline, MagnifyingGlassMinusIconOutline, MagnifyingGlassPlusIconOutline, } from '@neo4j-ndl/react/icons'; import IconButtonWithToolTip from '../UI/IconButtonToolTip'; -import { filterData, processGraphData } from '../../utils/Utils'; +import { filterData, processGraphData, sortAlphabetically } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; import { LegendsChip } from './LegendsChip'; import graphQueryAPI from '../../services/GraphQuery'; -import { graphLabels, intitalGraphType, mouseEventCallbacks, nvlOptions, queryMap } from '../../utils/Constants'; +import { + graphLabels, + intitalGraphType, + mouseEventCallbacks, + nvlOptions, + queryMap, + RESULT_STEP_SIZE, +} from '../../utils/Constants'; import CheckboxSelection from './CheckboxSelection'; +import { ShowAll } from '../UI/ShowAll'; + const GraphViewModal: React.FunctionComponent = ({ open, inspectedName, @@ -40,6 +67,10 @@ const GraphViewModal: React.FunctionComponent = ({ const { userCredentials } = useCredentials(); const [scheme, setScheme] = useState({}); const [newScheme, setNewScheme] = useState({}); + const [searchQuery, setSearchQuery] = useState(''); + const debouncedQuery = useDebounce(searchQuery, 300); + + // the checkbox selection const handleCheckboxChange = (graph: GraphType) => { const currentIndex = graphType.indexOf(graph); const newGraphSelected = [...graphType]; @@ -50,9 +81,18 @@ const GraphViewModal: React.FunctionComponent = ({ newGraphSelected.splice(currentIndex, 1); initGraph(newGraphSelected, allNodes, allRelationships, scheme); } + setSearchQuery(''); setGraphType(newGraphSelected); }; + const nodeCount = (nodes: ExtendedNode[], label: string): number => { + return [...new Set(nodes?.filter((n) => n.labels?.includes(label)).map((i) => i.id))].length; + }; + + const relationshipCount = (relationships: ExtendedRelationship[], label: string): number => { + return [...new Set(relationships?.filter((r) => r.caption?.includes(label)).map((i) => i.id))].length; + }; + const graphQuery: string = graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities @@ -62,6 +102,7 @@ const GraphViewModal: React.FunctionComponent = ({ ? queryMap.Entities : ''; + // fit graph to original position const handleZoomToFit = () => { nvlRef.current?.fit( allNodes.map((node) => node.id), @@ -75,7 +116,9 @@ const GraphViewModal: React.FunctionComponent = ({ handleZoomToFit(); }, 10); return () => { - nvlRef.current?.destroy(); + if (nvlRef.current) { + nvlRef.current?.destroy(); + } setGraphType(intitalGraphType); clearTimeout(timeoutId); setScheme({}); @@ -83,6 +126,7 @@ const GraphViewModal: React.FunctionComponent = ({ setRelationships([]); setAllNodes([]); setAllRelationships([]); + setSearchQuery(''); }; }, []); @@ -151,6 +195,71 @@ const GraphViewModal: React.FunctionComponent = ({ } }, [open]); + // The search and update nodes + const handleSearch = useCallback( + (value: string) => { + const query = value.toLowerCase(); + const updatedNodes = nodes.map((node) => { + if (query === '') { + return { + ...node, + activated: false, + selected: false, + size: graphLabels.nodeSize, + }; + } + const { id, properties, caption } = node; + const propertiesMatch = properties?.id?.toLowerCase().includes(query); + const match = id.toLowerCase().includes(query) || propertiesMatch || caption?.toLowerCase().includes(query); + return { + ...node, + activated: match, + selected: match, + size: + match && viewPoint === graphLabels.showGraphView + ? 100 + : match && viewPoint !== graphLabels.showGraphView + ? 50 + : graphLabels.nodeSize, + }; + }); + // deactivating any active relationships + const updatedRelationships = relationships.map((rel) => { + return { + ...rel, + activated: false, + selected: false, + }; + }); + setNodes(updatedNodes); + setRelationships(updatedRelationships); + }, + [nodes] + ); + + useEffect(() => { + handleSearch(debouncedQuery); + }, [debouncedQuery]); + + const initGraph = ( + graphType: GraphType[], + finalNodes: ExtendedNode[], + finalRels: Relationship[], + schemeVal: Scheme + ) => { + if (allNodes.length > 0 && allRelationships.length > 0) { + const { filteredNodes, filteredRelations, filteredScheme } = filterData( + graphType, + finalNodes ?? [], + finalRels ?? [], + schemeVal + ); + setNodes(filteredNodes); + setRelationships(filteredRelations); + setNewScheme(filteredScheme); + } + }; + // Unmounting the component if (!open) { return <>; @@ -201,10 +310,11 @@ const GraphViewModal: React.FunctionComponent = ({ setRelationships([]); setAllNodes([]); setAllRelationships([]); + setSearchQuery(''); }; // sort the legends in with Chunk and Document always the first two values - const legendCheck = Object.keys(newScheme).sort((a, b) => { + const nodeCheck = Object.keys(newScheme).sort((a, b) => { if (a === graphLabels.document || a === graphLabels.chunk) { return -1; } else if (b === graphLabels.document || b === graphLabels.chunk) { @@ -213,23 +323,75 @@ const GraphViewModal: React.FunctionComponent = ({ return a.localeCompare(b); }); - const initGraph = ( - graphType: GraphType[], - finalNodes: ExtendedNode[], - finalRels: Relationship[], - schemeVal: Scheme - ) => { - if (allNodes.length > 0 && allRelationships.length > 0) { - const { filteredNodes, filteredRelations, filteredScheme } = filterData( - graphType, - finalNodes ?? [], - finalRels ?? [], - schemeVal - ); - setNodes(filteredNodes); - setRelationships(filteredRelations); - setNewScheme(filteredScheme); + // get sorted relationships + const relationshipsSorted = relationships.sort(sortAlphabetically); + + // To get the relationship count + const groupedAndSortedRelationships: ExtendedRelationship[] = Object.values( + relationshipsSorted.reduce((acc: { [key: string]: ExtendedRelationship }, relType: Relationship) => { + const key = relType.caption || ''; + if (!acc[key]) { + acc[key] = { ...relType, count: 0 }; + } + + acc[key]!.count += relationshipCount(relationships as ExtendedRelationship[], key); + return acc; + }, {}) + ); + + // On Node Click, highlighting the nodes and deactivating any active relationships + const handleNodeClick = (nodeLabel: string) => { + const updatedNodes = nodes.map((node) => { + const isActive = node.labels.includes(nodeLabel); + return { + ...node, + activated: isActive, + selected: isActive, + size: + isActive && viewPoint === graphLabels.showGraphView + ? 100 + : isActive && viewPoint !== graphLabels.showGraphView + ? 50 + : graphLabels.nodeSize, + }; + }); + // deactivating any active relationships + const updatedRelationships = relationships.map((rel) => { + return { + ...rel, + activated: false, + selected: false, + }; + }); + if (searchQuery !== '') { + setSearchQuery(''); } + setNodes(updatedNodes); + setRelationships(updatedRelationships); + }; + // On Relationship Legend Click, highlight the relationships and deactivating any active nodes + const handleRelationshipClick = (nodeLabel: string) => { + const updatedRelations = relationships.map((rel) => { + return { + ...rel, + activated: rel?.caption?.includes(nodeLabel), + selected: rel?.caption?.includes(nodeLabel), + }; + }); + // // deactivating any active nodes + const updatedNodes = nodes.map((node) => { + return { + ...node, + activated: false, + selected: false, + size: graphLabels.nodeSize, + }; + }); + if (searchQuery !== '') { + setSearchQuery(''); + } + setRelationships(updatedRelations); + setNodes(updatedNodes); }; return ( @@ -334,17 +496,79 @@ const GraphViewModal: React.FunctionComponent = ({ handleClasses={{ left: 'ml-1' }} >
- - {graphLabels.resultOverview} - - {graphLabels.totalNodes} ({nodes.length}) - - -
- {legendCheck.map((key, index) => ( - - ))} -
+ {nodeCheck.length > 0 && ( + <> + + {graphLabels.resultOverview} +
+ { + setSearchQuery(e.target.value); + }} + placeholder='Search On Node Properties' + fluid={true} + leftIcon={ + + + + } + /> +
+ + {graphLabels.totalNodes} ({nodes.length}) + +
+
+ + {nodeCheck.map((nodeLabel, index) => ( + handleNodeClick(nodeLabel)} + /> + ))} + +
+ + )} + {relationshipsSorted.length > 0 && ( + <> + + + {graphLabels.totalRelationships} ({relationships.length}) + + +
+ + {groupedAndSortedRelationships.map((relType, index) => ( + handleRelationshipClick(relType.caption || '')} + /> + ))} + +
+ + )}
diff --git a/frontend/src/components/Graph/LegendsChip.tsx b/frontend/src/components/Graph/LegendsChip.tsx index 7e121dc67..2ab9c7b40 100644 --- a/frontend/src/components/Graph/LegendsChip.tsx +++ b/frontend/src/components/Graph/LegendsChip.tsx @@ -1,12 +1,6 @@ -import { useMemo } from 'react'; import { LegendChipProps } from '../../types'; import Legend from '../UI/Legend'; -export const LegendsChip: React.FunctionComponent = ({ scheme, title, nodes }) => { - const chunkcount = useMemo( - () => [...new Set(nodes?.filter((n) => n?.labels?.includes(title)).map((i) => i.id))].length, - [nodes] - ); - - return ; +export const LegendsChip: React.FunctionComponent = ({ scheme, label, type, count, onClick }) => { + return ; }; diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 63e2d7883..ab9fe2399 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -60,41 +60,43 @@ export default function ConnectionModal({ }, [open]); const recreateVectorIndex = useCallback( - async (isNewVectorIndex: boolean) => { - try { - setVectorIndexLoading(true); - const response = await createVectorIndex(userCredentials as UserCredentials, isNewVectorIndex); - setVectorIndexLoading(false); - if (response.data.status === 'Failed') { - throw new Error(response.data.error); - } else { - setMessage({ - type: 'success', - content: 'Successfully created the vector index', - }); - setConnectionStatus(true); - localStorage.setItem( - 'neo4j.connection', - JSON.stringify({ - uri: userCredentials?.uri, - user: userCredentials?.userName, - password: userCredentials?.password, - database: userCredentials?.database, - userDbVectorIndex: 384, - }) - ); - } - } catch (error) { - setVectorIndexLoading(false); - if (error instanceof Error) { - console.log('Error in recreating the vector index', error.message); - setMessage({ type: 'danger', content: error.message }); + async (isNewVectorIndex: boolean, usercredential: UserCredentials) => { + if (usercredential != null && Object.values(usercredential).length) { + try { + setVectorIndexLoading(true); + const response = await createVectorIndex(usercredential as UserCredentials, isNewVectorIndex); + setVectorIndexLoading(false); + if (response.data.status === 'Failed') { + throw new Error(response.data.error); + } else { + setMessage({ + type: 'success', + content: 'Successfully created the vector index', + }); + setConnectionStatus(true); + localStorage.setItem( + 'neo4j.connection', + JSON.stringify({ + uri: usercredential?.uri, + user: usercredential?.userName, + password: usercredential?.password, + database: usercredential?.database, + userDbVectorIndex: 384, + }) + ); + } + } catch (error) { + setVectorIndexLoading(false); + if (error instanceof Error) { + console.log('Error in recreating the vector index', error.message); + setMessage({ type: 'danger', content: error.message }); + } } + setTimeout(() => { + setMessage({ type: 'unknown', content: '' }); + setOpenConnection((prev) => ({ ...prev, openPopUp: false })); + }, 3000); } - setTimeout(() => { - setMessage({ type: 'unknown', content: '' }); - setOpenConnection((prev) => ({ ...prev, openPopUp: false })); - }, 3000); }, [userCredentials, userDbVectorIndex] ); @@ -104,8 +106,9 @@ export default function ConnectionModal({ type: 'danger', content: ( recreateVectorIndex(chunksExistsWithDifferentEmbedding)} + recreateVectorIndex={() => + recreateVectorIndex(chunksExistsWithDifferentEmbedding, userCredentials as UserCredentials) + } isVectorIndexAlreadyExists={chunksExistsWithDifferentEmbedding || isVectorIndexMatch} userVectorIndexDimension={JSON.parse(localStorage.getItem('neo4j.connection') ?? 'null').userDbVectorIndex} chunksExists={chunksExistsWithoutEmbedding} @@ -113,7 +116,7 @@ export default function ConnectionModal({ ), }); } - }, [isVectorIndexMatch, vectorIndexLoading, chunksExistsWithDifferentEmbedding, chunksExistsWithoutEmbedding]); + }, [isVectorIndexMatch, chunksExistsWithDifferentEmbedding, chunksExistsWithoutEmbedding, userCredentials]); const parseAndSetURI = (uri: string, urlparams = false) => { const uriParts: string[] = uri.split('://'); @@ -189,7 +192,8 @@ export default function ConnectionModal({ const submitConnection = async () => { const connectionURI = `${protocol}://${URI}${URI.split(':')[1] ? '' : `:${port}`}`; - setUserCredentials({ uri: connectionURI, userName: username, password: password, database: database, port: port }); + const credential = { uri: connectionURI, userName: username, password: password, database: database, port: port }; + setUserCredentials(credential); setIsLoading(true); try { const response = await connectAPI(connectionURI, username, password, database); @@ -197,6 +201,16 @@ export default function ConnectionModal({ if (response?.data?.status !== 'Success') { throw new Error(response.data.error); } else { + localStorage.setItem( + 'neo4j.connection', + JSON.stringify({ + uri: connectionURI, + user: username, + password: password, + database: database, + userDbVectorIndex, + }) + ); setUserDbVectorIndex(response.data.data.db_vector_dimension); if ( (response.data.data.application_dimension === response.data.data.db_vector_dimension || @@ -214,27 +228,19 @@ export default function ConnectionModal({ type: 'danger', content: ( - recreateVectorIndex( - !( - response.data.data.db_vector_dimension > 0 && - response.data.data.db_vector_dimension != response.data.data.application_dimension - ) - ) - } + recreateVectorIndex={() => recreateVectorIndex(false, credential)} isVectorIndexAlreadyExists={response.data.data.db_vector_dimension != 0} chunksExists={true} /> ), }); + return; } else { setMessage({ type: 'danger', content: ( recreateVectorIndex(true)} + recreateVectorIndex={() => recreateVectorIndex(true, credential)} isVectorIndexAlreadyExists={ response.data.data.db_vector_dimension != 0 && response.data.data.db_vector_dimension != response.data.data.application_dimension @@ -244,17 +250,8 @@ export default function ConnectionModal({ /> ), }); + return; } - localStorage.setItem( - 'neo4j.connection', - JSON.stringify({ - uri: connectionURI, - user: username, - password: password, - database: database, - userDbVectorIndex, - }) - ); } } catch (error) { setIsLoading(false); @@ -266,6 +263,9 @@ export default function ConnectionModal({ } } setTimeout(() => { + if (connectionMessage?.type != 'danger') { + setMessage({ type: 'unknown', content: '' }); + } setPassword(''); }, 3000); }; diff --git a/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx b/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx index f9e85b63e..3c2965f44 100644 --- a/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx +++ b/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx @@ -2,21 +2,22 @@ import { Box, Flex } from '@neo4j-ndl/react'; import Markdown from 'react-markdown'; import ButtonWithToolTip from '../../UI/ButtonWithToolTip'; import { useCredentials } from '../../../context/UserCredentials'; +import { useState } from 'react'; export default function VectorIndexMisMatchAlert({ - vectorIndexLoading, recreateVectorIndex, isVectorIndexAlreadyExists, userVectorIndexDimension, chunksExists, }: { - vectorIndexLoading: boolean; recreateVectorIndex: () => Promise; isVectorIndexAlreadyExists: boolean; userVectorIndexDimension?: number; chunksExists: boolean; }) { const { userCredentials } = useCredentials(); + const [vectorIndexLoading, setVectorIndexLoading] = useState(false); + return ( @@ -42,7 +43,11 @@ To proceed, please choose one of the following options: label='creates the supported vector index' placement='top' loading={vectorIndexLoading} - onClick={() => recreateVectorIndex()} + onClick={async () => { + setVectorIndexLoading(true); + await recreateVectorIndex(); + setVectorIndexLoading(false); + }} className='!w-full' color='danger' disabled={userCredentials === null} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index bf8345550..f5a021e30 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -160,7 +160,7 @@ export default function DeduplicationTab() { return ( {info.getValue().map((l, index) => ( - + ))} ); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx index 2dc8a13ff..373007e95 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx @@ -111,7 +111,7 @@ export default function DeletePopUpForOrphanNodes({ return ( {info.getValue().map((l, index) => ( - + ))} ); diff --git a/frontend/src/components/QuickStarter.tsx b/frontend/src/components/QuickStarter.tsx index 29832fee1..db2cba7e7 100644 --- a/frontend/src/components/QuickStarter.tsx +++ b/frontend/src/components/QuickStarter.tsx @@ -42,4 +42,4 @@ const QuickStarter: React.FunctionComponent = () => { ); }; -export default QuickStarter; \ No newline at end of file +export default QuickStarter; diff --git a/frontend/src/components/UI/Legend.tsx b/frontend/src/components/UI/Legend.tsx index d798ff4f4..a3c806669 100644 --- a/frontend/src/components/UI/Legend.tsx +++ b/frontend/src/components/UI/Legend.tsx @@ -3,16 +3,20 @@ import { GraphLabel } from '@neo4j-ndl/react'; export default function Legend({ bgColor, title, - chunkCount, + count, + type, + onClick, }: { bgColor: string; title: string; - chunkCount?: number; + count?: number; + type: 'node' | 'relationship' | 'propertyKey'; + tabIndex?: number; + onClick?: (e: React.MouseEvent) => void; }) { return ( - - {title} - {chunkCount && `(${chunkCount})`} + + {title} {count !== undefined && `(${count})`} ); } diff --git a/frontend/src/components/UI/ShowAll.tsx b/frontend/src/components/UI/ShowAll.tsx new file mode 100644 index 000000000..726146723 --- /dev/null +++ b/frontend/src/components/UI/ShowAll.tsx @@ -0,0 +1,38 @@ +import { Button } from '@neo4j-ndl/react'; +import type { ReactNode } from 'react'; +import { useState } from 'react'; + +// import { ButtonGroup } from '../button-group/button-group'; + +type ShowAllProps = { + initiallyShown: number; + /* pass thunk to enable rendering only shown components */ + children: ((() => ReactNode) | ReactNode)[]; + ariaLabel?: string; +}; +const isThunkComponent = (t: (() => ReactNode) | ReactNode): t is () => ReactNode => typeof t === 'function'; + +export function ShowAll({ initiallyShown, children }: ShowAllProps) { + const [expanded, setExpanded] = useState(false); + const toggleExpanded = () => setExpanded((e) => !e); + const itemCount = children.length; + const controlsNeeded = itemCount > initiallyShown; + const shown = expanded ? itemCount : initiallyShown; + const leftToShow = itemCount - shown; + + if (itemCount === 0) { + return null; + } + + const currentChildren = children.slice(0, shown).map((c) => (isThunkComponent(c) ? c() : c)); + return ( + <> +
{currentChildren}
+ {controlsNeeded && ( + + )} + + ); +} diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index 75f965d0a..fd3531403 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -58,7 +58,7 @@ const FileContextProvider: FC = ({ children }) => { const [selectedSchemas, setSelectedSchemas] = useState([]); const [rowSelection, setRowSelection] = useState>({}); const [selectedRows, setSelectedRows] = useState([]); - const [chatMode, setchatMode] = useState('graph+vector'); + const [chatMode, setchatMode] = useState('graph+vector+fulltext'); const [isSchema, setIsSchema] = useState(false); const [showTextFromSchemaDialog, setShowTextFromSchemaDialog] = useState({ triggeredFrom: '', diff --git a/frontend/src/hooks/useSse.tsx b/frontend/src/hooks/useSse.tsx index f8a07f61e..8b063751c 100644 --- a/frontend/src/hooks/useSse.tsx +++ b/frontend/src/hooks/useSse.tsx @@ -7,7 +7,7 @@ export default function useServerSideEvent( alertHandler: (inMinutes: boolean, minutes: number, filename: string) => void, errorHandler: (filename: string) => void ) { - const { setFilesData, setProcessedCount, queue } = useFileContext(); + const { setFilesData, setProcessedCount } = useFileContext(); function updateStatusForLargeFiles(eventSourceRes: eventResponsetypes) { const { fileName, @@ -45,7 +45,7 @@ export default function useServerSideEvent( }); }); } - } else if (status === 'Completed' || status === 'Cancelled') { + } else if (status === 'Completed') { setFilesData((prevfiles) => { return prevfiles.map((curfile) => { if (curfile.name == fileName) { @@ -67,7 +67,6 @@ export default function useServerSideEvent( } return prev + 1; }); - queue.remove(fileName); } else if (eventSourceRes.status === 'Failed') { setFilesData((prevfiles) => { return prevfiles.map((curfile) => { diff --git a/frontend/src/services/CancelAPI.ts b/frontend/src/services/CancelAPI.ts index a162bed83..de3ea9ba0 100644 --- a/frontend/src/services/CancelAPI.ts +++ b/frontend/src/services/CancelAPI.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { UserCredentials, commonserverresponse } from '../types'; +import api from '../API/Index'; const cancelAPI = async (filenames: string[], source_types: string[]) => { try { @@ -14,7 +13,7 @@ const cancelAPI = async (filenames: string[], source_types: string[]) => { } formData.append('filenames', JSON.stringify(filenames)); formData.append('source_types', JSON.stringify(source_types)); - const response = await axios.post(`${url()}/cancelled_job`, formData); + const response = await api.post(`/cancelled_job`, formData); return response; } catch (error) { console.log('Error Posting the Question:', error); diff --git a/frontend/src/services/ChunkEntitiesInfo.ts b/frontend/src/services/ChunkEntitiesInfo.ts index 3b4323197..aa133c815 100644 --- a/frontend/src/services/ChunkEntitiesInfo.ts +++ b/frontend/src/services/ChunkEntitiesInfo.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { ChatInfo_APIResponse, UserCredentials } from '../types'; +import api from '../API/Index'; const chunkEntitiesAPI = async (userCredentials: UserCredentials, chunk_ids: string) => { try { @@ -10,7 +9,7 @@ const chunkEntitiesAPI = async (userCredentials: UserCredentials, chunk_ids: str formData.append('password', userCredentials?.password ?? ''); formData.append('chunk_ids', chunk_ids); - const response: ChatInfo_APIResponse = await axios.post(`${url()}/chunk_entities`, formData, { + const response: ChatInfo_APIResponse = await api.post(`/chunk_entities`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, diff --git a/frontend/src/services/CommonAPI.ts b/frontend/src/services/CommonAPI.ts index 78d324248..bbae83032 100644 --- a/frontend/src/services/CommonAPI.ts +++ b/frontend/src/services/CommonAPI.ts @@ -1,5 +1,6 @@ -import axios, { AxiosResponse, Method } from 'axios'; +import { AxiosResponse, Method } from 'axios'; import { UserCredentials, FormDataParams } from '../types'; +import api from '../API/Index'; // API Call const apiCall = async ( @@ -16,7 +17,7 @@ const apiCall = async ( for (const key in additionalParams) { formData.append(key, additionalParams[key]); } - const response: AxiosResponse = await axios({ + const response: AxiosResponse = await api({ method: method, url: url, data: formData, diff --git a/frontend/src/services/ConnectAPI.ts b/frontend/src/services/ConnectAPI.ts index 73d997b1e..026c41c44 100644 --- a/frontend/src/services/ConnectAPI.ts +++ b/frontend/src/services/ConnectAPI.ts @@ -1,5 +1,4 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; +import api from '../API/Index'; const connectAPI = async (connectionURI: string, username: string, password: string, database: string) => { try { @@ -8,7 +7,7 @@ const connectAPI = async (connectionURI: string, username: string, password: str formData.append('database', database ?? ''); formData.append('userName', username ?? ''); formData.append('password', password ?? ''); - const response = await axios.post(`${url()}/connect`, formData, { + const response = await api.post(`/connect`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, diff --git a/frontend/src/services/DeleteFiles.ts b/frontend/src/services/DeleteFiles.ts index 36ae28cbd..a86ef187e 100644 --- a/frontend/src/services/DeleteFiles.ts +++ b/frontend/src/services/DeleteFiles.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { CustomFile, UserCredentials } from '../types'; +import api from '../API/Index'; const deleteAPI = async (userCredentials: UserCredentials, selectedFiles: CustomFile[], deleteEntities: boolean) => { try { @@ -14,7 +13,7 @@ const deleteAPI = async (userCredentials: UserCredentials, selectedFiles: Custom formData.append('deleteEntities', JSON.stringify(deleteEntities)); formData.append('filenames', JSON.stringify(filenames)); formData.append('source_types', JSON.stringify(source_types)); - const response = await axios.post(`${url()}/delete_document_and_entities`, formData); + const response = await api.post(`/delete_document_and_entities`, formData); return response; } catch (error) { console.log('Error Posting the Question:', error); diff --git a/frontend/src/services/DeleteOrphanNodes.ts b/frontend/src/services/DeleteOrphanNodes.ts index 135dfcdeb..2cebc572c 100644 --- a/frontend/src/services/DeleteOrphanNodes.ts +++ b/frontend/src/services/DeleteOrphanNodes.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { UserCredentials } from '../types'; +import api from '../API/Index'; const deleteOrphanAPI = async (userCredentials: UserCredentials, selectedNodes: string[]) => { try { @@ -10,7 +9,7 @@ const deleteOrphanAPI = async (userCredentials: UserCredentials, selectedNodes: formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); formData.append('unconnected_entities_list', JSON.stringify(selectedNodes)); - const response = await axios.post(`${url()}/delete_unconnected_nodes`, formData); + const response = await api.post(`/delete_unconnected_nodes`, formData); return response; } catch (error) { console.log('Error Posting the Question:', error); diff --git a/frontend/src/services/GetDuplicateNodes.ts b/frontend/src/services/GetDuplicateNodes.ts index 3cb27a970..b7ea0c426 100644 --- a/frontend/src/services/GetDuplicateNodes.ts +++ b/frontend/src/services/GetDuplicateNodes.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { duplicateNodesData, UserCredentials } from '../types'; +import api from '../API/Index'; export const getDuplicateNodes = async (userCredentials: UserCredentials) => { const formData = new FormData(); @@ -9,7 +8,7 @@ export const getDuplicateNodes = async (userCredentials: UserCredentials) => { formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); try { - const response = await axios.post(`${url()}/get_duplicate_nodes`, formData); + const response = await api.post(`/get_duplicate_nodes`, formData); return response; } catch (error) { console.log(error); diff --git a/frontend/src/services/GetFiles.ts b/frontend/src/services/GetFiles.ts index fd8d60fba..056a9cc05 100644 --- a/frontend/src/services/GetFiles.ts +++ b/frontend/src/services/GetFiles.ts @@ -1,14 +1,11 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { SourceListServerData, UserCredentials } from '../types'; +import api from '../API/Index'; export const getSourceNodes = async (userCredentials: UserCredentials) => { try { const encodedstr = btoa(userCredentials.password); - const response = await axios.get( - `${url()}/sources_list?uri=${userCredentials.uri}&database=${userCredentials.database}&userName=${ - userCredentials.userName - }&password=${encodedstr}` + const response = await api.get( + `/sources_list?uri=${userCredentials.uri}&database=${userCredentials.database}&userName=${userCredentials.userName}&password=${encodedstr}` ); return response; } catch (error) { diff --git a/frontend/src/services/GetNodeLabelsRelTypes.ts b/frontend/src/services/GetNodeLabelsRelTypes.ts index acc05f267..8c7345c2a 100644 --- a/frontend/src/services/GetNodeLabelsRelTypes.ts +++ b/frontend/src/services/GetNodeLabelsRelTypes.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { ServerData, UserCredentials } from '../types'; +import api from '../API/Index'; export const getNodeLabelsAndRelTypes = async (userCredentials: UserCredentials) => { const formData = new FormData(); @@ -9,7 +8,7 @@ export const getNodeLabelsAndRelTypes = async (userCredentials: UserCredentials) formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); try { - const response = await axios.post(`${url()}/schema`, formData); + const response = await api.post(`/schema`, formData); return response; } catch (error) { console.log(error); diff --git a/frontend/src/services/GetOrphanNodes.ts b/frontend/src/services/GetOrphanNodes.ts index 70cfe8cda..3578214d1 100644 --- a/frontend/src/services/GetOrphanNodes.ts +++ b/frontend/src/services/GetOrphanNodes.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { OrphanNodeResponse, UserCredentials } from '../types'; +import api from '../API/Index'; export const getOrphanNodes = async (userCredentials: UserCredentials) => { const formData = new FormData(); @@ -9,7 +8,7 @@ export const getOrphanNodes = async (userCredentials: UserCredentials) => { formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); try { - const response = await axios.post(`${url()}/get_unconnected_nodes_list`, formData); + const response = await api.post(`/get_unconnected_nodes_list`, formData); return response; } catch (error) { console.log(error); diff --git a/frontend/src/services/GraphQuery.ts b/frontend/src/services/GraphQuery.ts index 55d22a5ee..f792f4aec 100644 --- a/frontend/src/services/GraphQuery.ts +++ b/frontend/src/services/GraphQuery.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { UserCredentials } from '../types'; +import api from '../API/Index'; const graphQueryAPI = async ( userCredentials: UserCredentials, @@ -16,7 +15,7 @@ const graphQueryAPI = async ( formData.append('query_type', query_type ?? 'entities'); formData.append('document_names', JSON.stringify(document_names)); - const response = await axios.post(`${url()}/graph_query`, formData, { + const response = await api.post(`/graph_query`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, diff --git a/frontend/src/services/HealthStatus.ts b/frontend/src/services/HealthStatus.ts index 69a77f0fe..a59badbe0 100644 --- a/frontend/src/services/HealthStatus.ts +++ b/frontend/src/services/HealthStatus.ts @@ -1,9 +1,8 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; +import api from '../API/Index'; const healthStatus = async () => { try { - const healthUrl = `${url()}/health`; - const response = await axios.get(healthUrl); + const healthUrl = `/health`; + const response = await api.get(healthUrl); return response; } catch (error) { console.log('API status error', error); diff --git a/frontend/src/services/MergeDuplicateEntities.ts b/frontend/src/services/MergeDuplicateEntities.ts index ee4e0fffb..1fdb9b387 100644 --- a/frontend/src/services/MergeDuplicateEntities.ts +++ b/frontend/src/services/MergeDuplicateEntities.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { commonserverresponse, selectedDuplicateNodes, UserCredentials } from '../types'; +import api from '../API/Index'; const mergeDuplicateNodes = async (userCredentials: UserCredentials, selectedNodes: selectedDuplicateNodes[]) => { try { @@ -10,7 +9,7 @@ const mergeDuplicateNodes = async (userCredentials: UserCredentials, selectedNod formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); formData.append('duplicate_nodes_list', JSON.stringify(selectedNodes)); - const response = await axios.post(`${url()}/merge_duplicate_nodes`, formData); + const response = await api.post(`/merge_duplicate_nodes`, formData); return response; } catch (error) { console.log('Error Merging the duplicate nodes:', error); diff --git a/frontend/src/services/PollingAPI.ts b/frontend/src/services/PollingAPI.ts index 08e6a11bb..f14ed0580 100644 --- a/frontend/src/services/PollingAPI.ts +++ b/frontend/src/services/PollingAPI.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { PollingAPI_Response, statusupdate } from '../types'; +import api from '../API/Index'; export default async function subscribe( fileName: string, @@ -15,12 +14,12 @@ export default async function subscribe( const MAX_POLLING_ATTEMPTS = 10; let pollingAttempts = 0; - let delay = 2000; + let delay = 1000; while (pollingAttempts < MAX_POLLING_ATTEMPTS) { let currentdelay = delay; - let response: PollingAPI_Response = await axios.get( - `${url()}/document_status/${fileName}?url=${uri}&userName=${username}&password=${encodedstr}&database=${database}` + let response: PollingAPI_Response = await api.get( + `/document_status/${fileName}?url=${uri}&userName=${username}&password=${encodedstr}&database=${database}` ); if (response.data?.file_name?.status === 'Processing') { diff --git a/frontend/src/services/PostProcessing.ts b/frontend/src/services/PostProcessing.ts index 9f45cc6bd..98c94c238 100644 --- a/frontend/src/services/PostProcessing.ts +++ b/frontend/src/services/PostProcessing.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { UserCredentials } from '../types'; +import api from '../API/Index'; const postProcessing = async (userCredentials: UserCredentials, taskParam: string[]) => { try { @@ -10,7 +9,7 @@ const postProcessing = async (userCredentials: UserCredentials, taskParam: strin formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); formData.append('tasks', JSON.stringify(taskParam)); - const response = await axios.post(`${url()}/post_processing`, formData, { + const response = await api.post(`/post_processing`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, diff --git a/frontend/src/services/QnaAPI.ts b/frontend/src/services/QnaAPI.ts index 931618839..78fa240ba 100644 --- a/frontend/src/services/QnaAPI.ts +++ b/frontend/src/services/QnaAPI.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { UserCredentials } from '../types'; +import api from '../API/Index'; export const chatBotAPI = async ( userCredentials: UserCredentials, @@ -22,7 +21,7 @@ export const chatBotAPI = async ( formData.append('mode', mode); formData.append('document_names', JSON.stringify(document_names)); const startTime = Date.now(); - const response = await axios.post(`${url()}/chat_bot`, formData, { + const response = await api.post(`/chat_bot`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, @@ -44,7 +43,7 @@ export const clearChatAPI = async (userCredentials: UserCredentials, session_id: formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); formData.append('session_id', session_id); - const response = await axios.post(`${url()}/clear_chat_bot`, formData, { + const response = await api.post(`/clear_chat_bot`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, diff --git a/frontend/src/services/SchemaFromTextAPI.ts b/frontend/src/services/SchemaFromTextAPI.ts index 785fb0d68..3d1984ccf 100644 --- a/frontend/src/services/SchemaFromTextAPI.ts +++ b/frontend/src/services/SchemaFromTextAPI.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { ScehmaFromText } from '../types'; +import api from '../API/Index'; export const getNodeLabelsAndRelTypesFromText = async (model: string, inputText: string, isSchemaText: boolean) => { const formData = new FormData(); @@ -9,7 +8,7 @@ export const getNodeLabelsAndRelTypesFromText = async (model: string, inputText: formData.append('is_schema_description_checked', JSON.stringify(isSchemaText)); try { - const response = await axios.post(`${url()}/populate_graph_schema`, formData); + const response = await api.post(`/populate_graph_schema`, formData); return response; } catch (error) { console.log(error); diff --git a/frontend/src/services/URLScan.ts b/frontend/src/services/URLScan.ts index 58ad37dfb..444022934 100644 --- a/frontend/src/services/URLScan.ts +++ b/frontend/src/services/URLScan.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { ScanProps, ServerResponse } from '../types'; +import api from '../API/Index'; const urlScanAPI = async (props: ScanProps) => { try { @@ -46,7 +45,7 @@ const urlScanAPI = async (props: ScanProps) => { formData.append('access_token', props.access_token); } - const response: ServerResponse = await axios.post(`${url()}/url/scan`, formData, { + const response: ServerResponse = await api.post(`/url/scan`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, diff --git a/frontend/src/services/vectorIndexCreation.ts b/frontend/src/services/vectorIndexCreation.ts index accf85659..e89767551 100644 --- a/frontend/src/services/vectorIndexCreation.ts +++ b/frontend/src/services/vectorIndexCreation.ts @@ -1,6 +1,5 @@ -import axios from 'axios'; -import { url } from '../utils/Utils'; import { commonserverresponse, UserCredentials } from '../types'; +import api from '../API/Index'; export const createVectorIndex = async (userCredentials: UserCredentials, isVectorIndexExists: boolean) => { const formData = new FormData(); @@ -10,7 +9,7 @@ export const createVectorIndex = async (userCredentials: UserCredentials, isVect formData.append('password', userCredentials?.password ?? ''); formData.append('isVectorIndexExist', JSON.stringify(isVectorIndexExists)); try { - const response = await axios.post(`${url()}/drop_create_vector_index`, formData); + const response = await api.post(`/drop_create_vector_index`, formData); return response; } catch (error) { console.log(error); diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 8e05f632b..5bf076d10 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -259,7 +259,7 @@ export interface GraphViewModalProps { setGraphViewOpen: Dispatch>; viewPoint: string; nodeValues?: ExtendedNode[]; - relationshipValues?: Relationship[]; + relationshipValues?: ExtendedRelationship[]; selectedRows?: CustomFile[] | undefined; } @@ -344,13 +344,13 @@ export type alertStateType = { export type Scheme = Record; export type LabelCount = Record; -interface NodeType extends Partial { - labels?: string[]; -} + export interface LegendChipProps { scheme: Scheme; - title: string; - nodes: NodeType[]; + label: string; + type: 'node' | 'relationship' | 'propertyKey'; + count: number; + onClick: (e: React.MouseEvent) => void; } export interface FileContextProviderProps { children: ReactNode; @@ -578,29 +578,6 @@ export type GraphStatsLabels = Record< } >; -type NodeStyling = { - backgroundColor: string; - borderColor: string; - textColor: string; - caption: string; - diameter: string; -}; - -type RelationStyling = { - fontSize: string; - lineColor: string; - textColorExternal: string; - textColorInternal: string; - caption: string; - padding: string; - width: string; -}; - -export type GraphStyling = { - node: Record>; - relationship: Record>; -}; - export interface ExtendedNode extends Node { labels: string[]; properties: { @@ -610,7 +587,7 @@ export interface ExtendedNode extends Node { } export interface ExtendedRelationship extends Relationship { - labels: string[]; + count: number; } export interface connectionState { openPopUp: boolean; @@ -655,7 +632,7 @@ export interface S3File { } export interface GraphViewButtonProps { nodeValues?: ExtendedNode[]; - relationshipValues?: Relationship[]; + relationshipValues?: ExtendedRelationship[]; } export interface DrawerChatbotProps { isExpanded: boolean; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index b4781b929..47f1e119f 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -51,15 +51,15 @@ export const llms = 'bedrock_claude_3_5_sonnet', ]; -export const defaultLLM = llms?.includes('openai-gpt-4o-mini') - ? 'openai-gpt-4o-mini' +export const defaultLLM = llms?.includes('openai-gpt-4o') + ? 'openai-gpt-4o' : llms?.includes('gemini-1.0-pro') ? 'gemini-1.0-pro' : 'diffbot'; export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',') - : ['vector', 'graph', 'graph+vector', 'hybrid', 'hybrid+graph']; + : ['vector', 'graph', 'graph+vector', 'fulltext', 'graph+vector+fulltext']; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; export const timePerByte = 0.2; @@ -231,4 +231,8 @@ export const graphLabels = { totalNodes: 'Total Nodes', noEntities: 'No Entities Found', selectCheckbox: 'Select atleast one checkbox for graph view', + totalRelationships: 'Total Relationships', + nodeSize: 30, }; + +export const RESULT_STEP_SIZE = 25; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 9745993f6..0b8b05841 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -1,6 +1,6 @@ import { calcWordColor } from '@neo4j-devtools/word-color'; import type { Relationship } from '@neo4j-nvl/base'; -import { Entity, ExtendedNode, GraphType, Messages, Scheme } from '../types'; +import { Entity, ExtendedNode, ExtendedRelationship, GraphType, Messages, Scheme } from '../types'; // Get the Url export const url = () => { @@ -130,7 +130,7 @@ export function extractPdfFileName(url: string): string { return decodedFileName; } -export const processGraphData = (neoNodes: ExtendedNode[], neoRels: Relationship[]) => { +export const processGraphData = (neoNodes: ExtendedNode[], neoRels: ExtendedRelationship[]) => { const schemeVal: Scheme = {}; let iterator = 0; const labels: string[] = neoNodes.map((f: any) => f.labels); @@ -239,3 +239,9 @@ export const parseEntity = (entity: Entity) => { export const titleCheck = (title: string) => { return title === 'Chunk' || title === 'Document'; }; + +export const sortAlphabetically = (a: Relationship, b: Relationship) => { + const captionOne = a.caption?.toLowerCase() || ''; + const captionTwo = b.caption?.toLowerCase() || ''; + return captionOne.localeCompare(captionTwo); +}; diff --git a/frontend/yarn.lock b/frontend/yarn.lock index c91c34832..3915845ef 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -6614,4 +6614,4 @@ yocto-queue@^0.1.0: zwitch@^2.0.0: version "2.0.4" resolved "https://registry.yarnpkg.com/zwitch/-/zwitch-2.0.4.tgz#c827d4b0acb76fc3e685a4c6ec2902d51070e9d7" - integrity sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A== + integrity sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A== \ No newline at end of file From 8baedb01613eaa492ad02d0789c0134de66873c9 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Tue, 3 Sep 2024 12:45:43 +0000 Subject: [PATCH 040/292] modified the summary creation --- backend/src/communities.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/src/communities.py b/backend/src/communities.py index 1c670fff1..ec5199150 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -266,7 +266,7 @@ def create_community_summaries(gds, model): summaries = [] with ThreadPoolExecutor() as executor: - futures = [executor.submit(process_community_info, community, community_chain) for _, community in community_info_list.items()] + futures = [executor.submit(process_community_info, community, community_chain) for community in community_info_list.to_dict(orient="records")] for future in as_completed(futures): result = future.result() @@ -282,7 +282,7 @@ def create_community_summaries(gds, model): parent_summaries = [] with ThreadPoolExecutor() as executor: - futures = [executor.submit(process_community_info, community, parent_community_chain, is_parent=True) for _, community in parent_community_info.items()] + futures = [executor.submit(process_community_info, community, parent_community_chain, is_parent=True) for community in parent_community_info.to_dict(orient="records")] for future in as_completed(futures): result = future.result() From c666c368b1ce6c09d06d90f82451fdce7ad56629 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Tue, 3 Sep 2024 12:48:02 +0000 Subject: [PATCH 041/292] fixed the summary creation --- backend/src/communities.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/src/communities.py b/backend/src/communities.py index ec5199150..7086a4786 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -225,7 +225,6 @@ def get_community_chain(model, is_parent=False,community_template=COMMUNITY_TEMP logging.error(f"Failed to create community chain: {e}") raise - def prepare_string(community_data): try: nodes_description = "Nodes are:\n" From c15932207fdce246ceaa872f40fecb28101061cb Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Tue, 3 Sep 2024 13:55:36 +0000 Subject: [PATCH 042/292] Configuration change. Update LLM models and remove --preload from docker file --- backend/Dockerfile | 2 +- backend/src/document_sources/wikipedia.py | 2 +- backend/src/shared/constants.py | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index ff46b33e5..5249ac53c 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -21,4 +21,4 @@ RUN pip install -r requirements.txt # Copy application code COPY . /code # Set command -CMD ["gunicorn", "score:app", "--workers", "8","--preload","--threads", "8", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000", "--timeout", "300"] +CMD ["gunicorn", "score:app", "--workers", "8","--threads", "8", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000", "--timeout", "300"] diff --git a/backend/src/document_sources/wikipedia.py b/backend/src/document_sources/wikipedia.py index 1c8b32a33..71820a69e 100644 --- a/backend/src/document_sources/wikipedia.py +++ b/backend/src/document_sources/wikipedia.py @@ -4,7 +4,7 @@ def get_documents_from_Wikipedia(wiki_query:str, language:str): try: - pages = WikipediaLoader(query=wiki_query.strip(), lang=language, load_max_docs=1, load_all_available_meta=False).load() + pages = WikipediaLoader(query=wiki_query.strip(), lang=language, load_all_available_meta=False).load() file_name = wiki_query.strip() logging.info(f"Total Pages from Wikipedia = {len(pages)}") return file_name, pages diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index c5f8e98a4..b67e92730 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -1,11 +1,11 @@ MODEL_VERSIONS = { - "openai-gpt-3.5": "gpt-3.5-turbo-16k", + "openai-gpt-3.5": "gpt-3.5-turbo-0125", "gemini-1.0-pro": "gemini-1.0-pro-001", "gemini-1.5-pro": "gemini-1.5-pro-preview-0514", - "openai-gpt-4": "gpt-4-0125-preview", - "diffbot" : "gpt-4o", - "openai-gpt-4o-mini": "gpt-4o-mini", - "openai-gpt-4o":"gpt-4o", + "openai-gpt-4": "gpt-4-turbo-2024-04-09", + "diffbot" : "gpt-4-turbo-2024-04-09", + "openai-gpt-4o-mini": "gpt-4o-mini-2024-07-18", + "openai-gpt-4o":"gpt-4o-2024-08-06", "groq-llama3" : "llama3-70b-8192" } OPENAI_MODELS = ["openai-gpt-3.5", "openai-gpt-4o", "openai-gpt-4o-mini"] From e08aeab10816f65a606e9fdfa547e6d1b0427a5d Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 4 Sep 2024 14:22:12 +0530 Subject: [PATCH 043/292] Retry processing (#698) * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Reapply "Dockerfile changes with VITE label" This reverts commit a83e0855fbf54d2b5af009d96c4edf0bcd7ab84a. * Revert "Dockerfile changes with VITE label" This reverts commit 2840ebc9e6156c51465a9f54be72ca2d014147c2. * Concurrent processing of files (#665) * Update README.md * Droped the old vector index (#652) * added cypher_queries and llm chatbot files * updated llm-chatbot-python * added llm-chatbot-python * updated llm-chatbot-python folder * Added chatbot "hybrid " mode use case * added the concurrent file processing * page refresh scenario * fixed waiting files processing issue in refresh scenario * removed boolean param * fixed processedCount issue * checkbox with waiting check * fixed the refresh scenario with processing files * processing files check * server side error * processing file count check for processing files less than batch size * processing count check to handle allselected files * created helper functions * code improvements * __ changes (#656) * DiffbotGraphTransformer doesn't need an LLMGraphTransformer (#659) Co-authored-by: jeromechoo * Removed experiments/llm-chatbot-python folder from DEV branch * redcued the password clear timeout * Removed experiments/Cypher_Queries.ipynb file from DEV branch * disabled the closed button on banner and connection dialog while API is in pending state * update delete query with entities * node id check (#663) * Status source and type filtering (#664) * status source * Name change * type change * rollback to previous working nvl version * added the alert * add BATCH_SIZE to docker * temp fixes for 0.3.1 * alert fix for less than batch size processing * new virtual env * added Hybrid Chat modes (#670) * Rename the function #657 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * Graph node filename check * env fixes with latest nvl libraries * format fixes * removed local files * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Status source and type filtering (#664) * status source * Name change * type change * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * added cypher_queries and llm chatbot files * updated llm-chatbot-python * added llm-chatbot-python * updated llm-chatbot-python folder * page refresh scenario * fixed waiting files processing issue in refresh scenario * Removed experiments/llm-chatbot-python folder from DEV branch * disabled the closed button on banner and connection dialog while API is in pending state * node id check (#663) * Status source and type filtering (#664) * status source * Name change * type change * rollback to previous working nvl version * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Status source and type filtering (#664) * status source * Name change * type change * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * property spell fix --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Jayanth T Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env changes * format fixes * set retry status * retry processing backend * added the retry icon on rows * vite changes in docker compose * added retry dialog * Integrated the Retry processing API * Integrated the Extract API fro retry processing * Integrated ndl toast component * replaced foreach with normal for loop for better performance * types improvements * used toast component * spell fix * Issue fixed * processing changes in main * function closing fix * retry processing issue fixed * autoclosing the retry popup on retry api success * removed the retry if check * resetting the node and relationship count on retry * added the enter key events on the popups * fixed wikipedia icon on large file alert popup * setting nodes to 0 and start from last processed chunk logic changes * Retry Popup fixes * status changes for upload failed scenario * kept condition specific * changed status to reprocess from retry * Reprocess wording changes * tooltip changes * wordings and size changes * Changed status to Reprocess --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Jayanth T Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> --- backend/score.py | 33 +- backend/src/entities/source_node.py | 1 + backend/src/graphDB_dataAccess.py | 11 +- backend/src/main.py | 218 ++++++++---- backend/src/shared/constants.py | 39 +++ frontend/example.env | 2 +- frontend/src/App.tsx | 4 +- .../src/components/ChatBot/ChatInfoModal.tsx | 5 +- frontend/src/components/ChatBot/Chatbot.tsx | 10 +- .../src/components/ChatBot/Info/InfoModal.tsx | 5 +- frontend/src/components/Content.tsx | 319 ++++++++++-------- .../components/DataSources/AWS/S3Modal.tsx | 119 ++++--- .../components/DataSources/GCS/GCSModal.tsx | 180 +++++----- .../components/DataSources/Local/DropZone.tsx | 61 +--- .../Local/DropZoneForSmallLayouts.tsx | 59 ++-- frontend/src/components/FileTable.tsx | 215 +++++------- frontend/src/components/Layout/PageLayout.tsx | 35 +- .../DeleteTabForOrphanNodes/index.tsx | 5 +- .../EntityExtractionSetting.tsx | 22 +- .../Popups/GraphEnhancementDialog/index.tsx | 6 - .../LargeFilePopUp/ConfirmationDialog.tsx | 5 +- .../Popups/LargeFilePopUp/LargeFilesAlert.tsx | 3 +- .../Popups/RetryConfirmation/Index.tsx | 84 +++++ .../Popups/Settings/SchemaFromText.tsx | 22 +- .../Popups/Settings/SettingModal.tsx | 16 +- .../WebSources/CustomSourceInput.tsx | 5 + frontend/src/hooks/useSourceInput.tsx | 65 ++-- frontend/src/services/CommonAPI.ts | 2 +- frontend/src/services/retry.ts | 23 ++ frontend/src/types.ts | 85 ++--- frontend/src/utils/Constants.ts | 6 +- frontend/src/utils/FileAPI.ts | 7 + frontend/src/utils/Utils.ts | 50 ++- frontend/src/utils/toasts.ts | 22 ++ frontend/yarn.lock | 1 + 35 files changed, 1006 insertions(+), 739 deletions(-) create mode 100644 frontend/src/components/Popups/RetryConfirmation/Index.tsx create mode 100644 frontend/src/services/retry.ts create mode 100644 frontend/src/utils/toasts.ts diff --git a/backend/score.py b/backend/score.py index 00650b56e..08ea9a6b2 100644 --- a/backend/score.py +++ b/backend/score.py @@ -137,7 +137,8 @@ async def extract_knowledge_graph_from_file( allowedNodes=Form(None), allowedRelationship=Form(None), language=Form(None), - access_token=Form(None) + access_token=Form(None), + retry_condition=Form(None) ): """ Calls 'extract_graph_from_file' in a new thread to create Neo4jGraph from a @@ -161,30 +162,30 @@ async def extract_knowledge_graph_from_file( merged_file_path = os.path.join(MERGED_DIR,file_name) logging.info(f'File path:{merged_file_path}') result = await asyncio.to_thread( - extract_graph_from_file_local_file, uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship) + extract_graph_from_file_local_file, uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 's3 bucket' and source_url: result = await asyncio.to_thread( - extract_graph_from_file_s3, uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, allowedNodes, allowedRelationship) + extract_graph_from_file_s3, uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'web-url': result = await asyncio.to_thread( - extract_graph_from_web_page, uri, userName, password, database, model, source_url, allowedNodes, allowedRelationship) + extract_graph_from_web_page, uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'youtube' and source_url: result = await asyncio.to_thread( - extract_graph_from_file_youtube, uri, userName, password, database, model, source_url, allowedNodes, allowedRelationship) + extract_graph_from_file_youtube, uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'Wikipedia' and wiki_query: result = await asyncio.to_thread( - extract_graph_from_file_Wikipedia, uri, userName, password, database, model, wiki_query, max_sources, language, allowedNodes, allowedRelationship) + extract_graph_from_file_Wikipedia, uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'gcs bucket' and gcs_bucket_name: result = await asyncio.to_thread( - extract_graph_from_file_gcs, uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, allowedNodes, allowedRelationship) + extract_graph_from_file_gcs, uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition) else: return create_api_response('Failed',message='source_type is other than accepted source') - + if result is not None: result['db_url'] = uri result['api_name'] = 'extract' @@ -626,6 +627,22 @@ async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), da return create_api_response(job_status, message=message, error=error_message) finally: gc.collect() + +@app.post("/retry_processing") +async def retry_processing(uri=Form(), userName=Form(), password=Form(), database=Form(), file_name=Form(), retry_condition=Form()): + try: + graph = create_graph_database_connection(uri, userName, password, database) + await asyncio.to_thread(set_status_retry, graph,file_name,retry_condition) + #set_status_retry(graph,file_name,retry_condition) + return create_api_response('Success',message=f"Status set to Reprocess for filename : {file_name}") + except Exception as e: + job_status = "Failed" + message="Unable to set status to Retry" + error_message = str(e) + logging.exception(f'{error_message}') + return create_api_response(job_status, message=message, error=error_message) + finally: + gc.collect() if __name__ == "__main__": uvicorn.run(app) diff --git a/backend/src/entities/source_node.py b/backend/src/entities/source_node.py index aa0e73faa..1a1c70487 100644 --- a/backend/src/entities/source_node.py +++ b/backend/src/entities/source_node.py @@ -24,3 +24,4 @@ class sourceNode: is_cancelled:bool=None processed_chunk:int=None access_token:str=None + retry_condition:str=None diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index c77a1e773..c1eaaa190 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -71,10 +71,10 @@ def update_source_node(self, obj_source_node:sourceNode): if obj_source_node.processing_time is not None and obj_source_node.processing_time != 0: params['processingTime'] = round(obj_source_node.processing_time.total_seconds(),2) - if obj_source_node.node_count is not None and obj_source_node.node_count != 0: + if obj_source_node.node_count is not None : params['nodeCount'] = obj_source_node.node_count - if obj_source_node.relationship_count is not None and obj_source_node.relationship_count != 0: + if obj_source_node.relationship_count is not None : params['relationshipCount'] = obj_source_node.relationship_count if obj_source_node.model is not None and obj_source_node.model != '': @@ -86,11 +86,14 @@ def update_source_node(self, obj_source_node:sourceNode): if obj_source_node.total_chunks is not None and obj_source_node.total_chunks != 0: params['total_chunks'] = obj_source_node.total_chunks - if obj_source_node.is_cancelled is not None and obj_source_node.is_cancelled != False: + if obj_source_node.is_cancelled is not None: params['is_cancelled'] = obj_source_node.is_cancelled - if obj_source_node.processed_chunk is not None and obj_source_node.processed_chunk != 0: + if obj_source_node.processed_chunk is not None : params['processed_chunk'] = obj_source_node.processed_chunk + + if obj_source_node.retry_condition is not None : + params['retry_condition'] = obj_source_node.retry_condition param= {"props":params} diff --git a/backend/src/main.py b/backend/src/main.py index a7d5058a0..3a88f1eb3 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -1,5 +1,11 @@ from langchain_community.graphs import Neo4jGraph -from src.shared.constants import BUCKET_UPLOAD, PROJECT_ID +from src.shared.constants import (BUCKET_UPLOAD, PROJECT_ID, QUERY_TO_GET_CHUNKS, + QUERY_TO_DELETE_EXISTING_ENTITIES, + QUERY_TO_GET_LAST_PROCESSED_CHUNK_POSITION, + QUERY_TO_GET_LAST_PROCESSED_CHUNK_WITHOUT_ENTITY, + START_FROM_BEGINNING, + START_FROM_LAST_PROCESSED_POSITION, + DELETE_ENTITIES_AND_START_FROM_BEGINNING) from src.shared.schema_extraction import schema_extraction_from_text from dotenv import load_dotenv from datetime import datetime @@ -182,68 +188,75 @@ def create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type lst_file_name.append({'fileName':obj_source_node.file_name,'fileSize':obj_source_node.file_size,'url':obj_source_node.url, 'language':obj_source_node.language, 'status':'Success'}) return lst_file_name,success_count,failed_count -def extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, fileName, allowedNodes, allowedRelationship): +def extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, fileName, allowedNodes, allowedRelationship, retry_condition): logging.info(f'Process file name :{fileName}') - gcs_file_cache = os.environ.get('GCS_FILE_CACHE') - if gcs_file_cache == 'True': - folder_name = create_gcs_bucket_folder_name_hashed(uri, fileName) - file_name, pages = get_documents_from_gcs( PROJECT_ID, BUCKET_UPLOAD, folder_name, fileName) + if retry_condition is None: + gcs_file_cache = os.environ.get('GCS_FILE_CACHE') + if gcs_file_cache == 'True': + folder_name = create_gcs_bucket_folder_name_hashed(uri, fileName) + file_name, pages = get_documents_from_gcs( PROJECT_ID, BUCKET_UPLOAD, folder_name, fileName) + else: + file_name, pages, file_extension = get_documents_from_file_by_path(merged_file_path,fileName) + if pages==None or len(pages)==0: + raise Exception(f'File content is not available for file : {file_name}') + return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, True, merged_file_path) else: - file_name, pages, file_extension = get_documents_from_file_by_path(merged_file_path,fileName) - if pages==None or len(pages)==0: - raise Exception(f'File content is not available for file : {file_name}') - - return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, True, merged_file_path) - -def extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, allowedNodes, allowedRelationship): + return processing_source(uri, userName, password, database, model, fileName, [], allowedNodes, allowedRelationship, True, merged_file_path, retry_condition) + +def extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition): + if retry_condition is None: + if(aws_access_key_id==None or aws_secret_access_key==None): + raise Exception('Please provide AWS access and secret keys') + else: + logging.info("Insert in S3 Block") + file_name, pages = get_documents_from_s3(source_url, aws_access_key_id, aws_secret_access_key) - if(aws_access_key_id==None or aws_secret_access_key==None): - raise Exception('Please provide AWS access and secret keys') + if pages==None or len(pages)==0: + raise Exception(f'File content is not available for file : {file_name}') + return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) else: - logging.info("Insert in S3 Block") - file_name, pages = get_documents_from_s3(source_url, aws_access_key_id, aws_secret_access_key) - - if pages==None or len(pages)==0: - raise Exception(f'File content is not available for file : {file_name}') - - return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) - -def extract_graph_from_web_page(uri, userName, password, database, model, source_url, allowedNodes, allowedRelationship): - - file_name, pages = get_documents_from_web_page(source_url) - - if pages==None or len(pages)==0: - raise Exception(f'Content is not available for given URL : {file_name}') - - return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) - -def extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, allowedNodes, allowedRelationship): + return processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) - file_name, pages = get_documents_from_youtube(source_url) - - if pages==None or len(pages)==0: - raise Exception(f'Youtube transcript is not available for file : {file_name}') - - return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) - -def extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, max_sources, language, allowedNodes, allowedRelationship): +def extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition): + if retry_condition is None: + file_name, pages = get_documents_from_web_page(source_url) - file_name, pages = get_documents_from_Wikipedia(wiki_query, language) - if pages==None or len(pages)==0: - raise Exception(f'Wikipedia page is not available for file : {file_name}') - - return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) - -def extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, allowedNodes, allowedRelationship): - - file_name, pages = get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token) - if pages==None or len(pages)==0: - raise Exception(f'File content is not available for file : {file_name}') + if pages==None or len(pages)==0: + raise Exception(f'Content is not available for given URL : {file_name}') + return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + else: + return processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) + +def extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition): + if retry_condition is None: + file_name, pages = get_documents_from_youtube(source_url) - return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + if pages==None or len(pages)==0: + raise Exception(f'Youtube transcript is not available for file : {file_name}') + return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + else: + return processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) + +def extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition): + if retry_condition is None: + file_name, pages = get_documents_from_Wikipedia(wiki_query, language) + if pages==None or len(pages)==0: + raise Exception(f'Wikipedia page is not available for file : {file_name}') + return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + else: + return processing_source(uri, userName, password, database, model, file_name,[], allowedNodes, allowedRelationship, retry_condition=retry_condition) -def processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, is_uploaded_from_local=None, merged_file_path=None): +def extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition): + if retry_condition is None: + file_name, pages = get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token) + if pages==None or len(pages)==0: + raise Exception(f'File content is not available for file : {file_name}') + return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + else: + return processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) + +def processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, is_uploaded_from_local=None, merged_file_path=None, retry_condition=None): """ Extracts a Neo4jGraph from a PDF file based on the model. @@ -263,31 +276,27 @@ def processing_source(uri, userName, password, database, model, file_name, pages graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) + total_chunks, chunkId_chunkDoc_list = get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition) result = graphDb_data_Access.get_current_status_document_node(file_name) - print(result) - logging.info("Break down file into chunks") - bad_chars = ['"', "\n", "'"] - for i in range(0,len(pages)): - text = pages[i].page_content - for j in bad_chars: - if j == '\n': - text = text.replace(j, ' ') - else: - text = text.replace(j, '') - pages[i]=Document(page_content=str(text), metadata=pages[i].metadata) - create_chunks_obj = CreateChunksofDocument(pages, graph) - chunks = create_chunks_obj.split_file_into_chunks() - chunkId_chunkDoc_list = create_relation_between_chunks(graph,file_name,chunks) - + + select_chunks_with_retry=0 + node_count = 0 + rel_count = 0 + if len(result) > 0: if result[0]['Status'] != 'Processing': obj_source_node = sourceNode() status = "Processing" obj_source_node.file_name = file_name obj_source_node.status = status - obj_source_node.total_chunks = len(chunks) + obj_source_node.total_chunks = total_chunks obj_source_node.total_pages = len(pages) obj_source_node.model = model + if retry_condition == START_FROM_LAST_PROCESSED_POSITION: + node_count = result[0]['nodeCount'] + rel_count = result[0]['relationshipCount'] + select_chunks_with_retry = result[0]['processed_chunk'] + obj_source_node.processed_chunk = 0+select_chunks_with_retry logging.info(file_name) logging.info(obj_source_node) graphDb_data_Access.update_source_node(obj_source_node) @@ -297,21 +306,22 @@ def processing_source(uri, userName, password, database, model, file_name, pages # selected_chunks = [] is_cancelled_status = False job_status = "Completed" - node_count = 0 - rel_count = 0 + for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed): select_chunks_upto = i+update_graph_chunk_processed logging.info(f'Selected Chunks upto: {select_chunks_upto}') if len(chunkId_chunkDoc_list) <= select_chunks_upto: select_chunks_upto = len(chunkId_chunkDoc_list) selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto] + + logging.info(f"select_chunks_upto={select_chunks_upto}, update_graph_chunk_processed={update_graph_chunk_processed}, selected_chunks={len(selected_chunks)}") result = graphDb_data_Access.get_current_status_document_node(file_name) is_cancelled_status = result[0]['is_cancelled'] logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}") if bool(is_cancelled_status) == True: job_status = "Cancelled" logging.info('Exit from running loop of processing file') - exit + break else: node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count) end_time = datetime.now() @@ -321,8 +331,8 @@ def processing_source(uri, userName, password, database, model, file_name, pages obj_source_node.file_name = file_name obj_source_node.updated_at = end_time obj_source_node.processing_time = processed_time + obj_source_node.processed_chunk = select_chunks_upto+select_chunks_with_retry obj_source_node.node_count = node_count - obj_source_node.processed_chunk = select_chunks_upto obj_source_node.relationship_count = rel_count graphDb_data_Access.update_source_node(obj_source_node) @@ -406,6 +416,47 @@ def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, datab print(f'relation count internal func:{rel_count}') return node_count,rel_count +def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): + if retry_condition is None: + logging.info("Break down file into chunks") + bad_chars = ['"', "\n", "'"] + for i in range(0,len(pages)): + text = pages[i].page_content + for j in bad_chars: + if j == '\n': + text = text.replace(j, ' ') + else: + text = text.replace(j, '') + pages[i]=Document(page_content=str(text), metadata=pages[i].metadata) + create_chunks_obj = CreateChunksofDocument(pages, graph) + chunks = create_chunks_obj.split_file_into_chunks() + chunkId_chunkDoc_list = create_relation_between_chunks(graph,file_name,chunks) + return len(chunks), chunkId_chunkDoc_list + + else: + chunkId_chunkDoc_list=[] + chunks = graph.query(QUERY_TO_GET_CHUNKS, params={"filename":file_name}) + for chunk in chunks: + chunk_doc = Document(page_content=chunk['text'], metadata={'id':chunk['id'], 'position':chunk['position']}) + chunkId_chunkDoc_list.append({'chunk_id': chunk['id'], 'chunk_doc': chunk_doc}) + + if retry_condition == START_FROM_LAST_PROCESSED_POSITION: + logging.info(f"Retry : start_from_last_processed_position") + starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_POSITION, params={"filename":file_name}) + if starting_chunk[0]["position"] < len(chunkId_chunkDoc_list): + return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:] + + elif starting_chunk[0]["position"] == len(chunkId_chunkDoc_list): + starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_WITHOUT_ENTITY, params={"filename":file_name}) + return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:] + + else: + raise Exception(f"All chunks of {file_name} are alreday processed. If you want to re-process, Please start from begnning") + + else: + logging.info(f"Retry : start_from_beginning with chunks {len(chunkId_chunkDoc_list)}") + return len(chunks), chunkId_chunkDoc_list + def get_source_list_from_graph(uri,userName,password,db_name=None): """ Args: @@ -561,4 +612,21 @@ def populate_graph_schema_from_text(text, model, is_schema_description_cheked): data (list): list of lebels and relationTypes """ result = schema_extraction_from_text(text, model, is_schema_description_cheked) - return {"labels": result.labels, "relationshipTypes": result.relationshipTypes} \ No newline at end of file + return {"labels": result.labels, "relationshipTypes": result.relationshipTypes} + +def set_status_retry(graph, file_name, retry_condition): + graphDb_data_Access = graphDBdataAccess(graph) + obj_source_node = sourceNode() + status = "Reprocess" + obj_source_node.file_name = file_name + obj_source_node.status = status + obj_source_node.retry_condition = retry_condition + obj_source_node.is_cancelled = False + if retry_condition == DELETE_ENTITIES_AND_START_FROM_BEGINNING or retry_condition == START_FROM_BEGINNING: + obj_source_node.processed_chunk=0 + if retry_condition == DELETE_ENTITIES_AND_START_FROM_BEGINNING: + graph.query(QUERY_TO_DELETE_EXISTING_ENTITIES, params={"filename":file_name}) + obj_source_node.node_count=0 + obj_source_node.relationship_count=0 + logging.info(obj_source_node) + graphDb_data_Access.update_source_node(obj_source_node) \ No newline at end of file diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index b67e92730..8bb1c56b1 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -277,3 +277,42 @@ RETURN text, avg_score as score, {{length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails}} AS metadata """ YOUTUBE_CHUNK_SIZE_SECONDS = 60 + +QUERY_TO_GET_CHUNKS = """ + MATCH (d:Document) + WHERE d.fileName = $filename + WITH d + OPTIONAL MATCH (d)<-[:PART_OF|FIRST_CHUNK]-(c:Chunk) + RETURN c.id as id, c.text as text, c.position as position + """ + +QUERY_TO_DELETE_EXISTING_ENTITIES = """ + MATCH (d:Document {fileName:$filename}) + WITH d + MATCH (d)<-[:PART_OF]-(c:Chunk) + WITH d,c + MATCH (c)-[:HAS_ENTITY]->(e) + WHERE NOT EXISTS { (e)<-[:HAS_ENTITY]-()<-[:PART_OF]-(d2:Document) } + DETACH DELETE e + """ + +QUERY_TO_GET_LAST_PROCESSED_CHUNK_POSITION=""" + MATCH (d:Document) + WHERE d.fileName = $filename + WITH d + MATCH (c:Chunk) WHERE c.embedding is null + RETURN c.id as id,c.position as position + ORDER BY c.position LIMIT 1 + """ +QUERY_TO_GET_LAST_PROCESSED_CHUNK_WITHOUT_ENTITY = """ + MATCH (d:Document) + WHERE d.fileName = $filename + WITH d + MATCH (d)<-[:PART_OF]-(c:Chunk) WHERE NOT exists {(c)-[:HAS_ENTITY]->()} + RETURN c.id as id,c.position as position + ORDER BY c.position LIMIT 1 + """ + +START_FROM_BEGINNING = "start_from_beginning" +DELETE_ENTITIES_AND_START_FROM_BEGINNING = "delete_entities_and_start_from_beginning" +START_FROM_LAST_PROCESSED_POSITION = "start_from_last_processed_position" diff --git a/frontend/example.env b/frontend/example.env index 63bd3e7c3..9bc5bc0d5 100644 --- a/frontend/example.env +++ b/frontend/example.env @@ -8,4 +8,4 @@ VITE_CHUNK_SIZE=5242880 VITE_LARGE_FILE_SIZE=5242880 VITE_GOOGLE_CLIENT_ID="" VITE_CHAT_MODES="" -VITE_BATCH_SIZE=2 \ No newline at end of file +VITE_BATCH_SIZE=2 diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index f35599b98..df97d4c09 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -5,7 +5,7 @@ import QuickStarter from './components/QuickStarter'; import { GoogleOAuthProvider } from '@react-oauth/google'; import { APP_SOURCES } from './utils/Constants'; import ErrorBoundary from './components/UI/ErrroBoundary'; - +import { Toaster } from '@neo4j-ndl/react'; const App: React.FC = () => { return ( <> @@ -14,6 +14,7 @@ const App: React.FC = () => { + @@ -21,6 +22,7 @@ const App: React.FC = () => { + )} diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 89c15ea72..3c6642665 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -131,11 +131,12 @@ const ChatInfoModal: React.FC = ({ }; const labelCounts = useMemo(() => { const counts: { [label: string]: number } = {}; - infoEntities.forEach((entity) => { + for (let index = 0; index < infoEntities.length; index++) { + const entity = infoEntities[index]; const { labels } = entity; const [label] = labels; counts[label] = counts[label] ? counts[label] + 1 : 1; - }); + } return counts; }, [infoEntities]); const sortedLabels = useMemo(() => { diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 448af0e7d..7352b0ff4 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -55,13 +55,15 @@ const Chatbot: FC = (props) => { }, }); let selectedFileNames: CustomFile[] = []; - selectedRows.forEach((id) => { - filesData.forEach((f) => { + for (let index = 0; index < selectedRows.length; index++) { + const id = selectedRows[index]; + for (let index = 0; index < filesData.length; index++) { + const f = filesData[index]; if (f.id === id) { selectedFileNames.push(f); } - }); - }); + } + } const handleInputChange = (e: React.ChangeEvent) => { setInputMessage(e.target.value); diff --git a/frontend/src/components/ChatBot/Info/InfoModal.tsx b/frontend/src/components/ChatBot/Info/InfoModal.tsx index cf1bbca47..0d5b82a64 100644 --- a/frontend/src/components/ChatBot/Info/InfoModal.tsx +++ b/frontend/src/components/ChatBot/Info/InfoModal.tsx @@ -77,11 +77,12 @@ const InfoModal: React.FC = ({ sources, model, total_tokens, re }; const labelCounts = useMemo(() => { const counts: { [label: string]: number } = {}; - infoEntities.forEach((entity) => { + for (let index = 0; index < infoEntities.length; index++) { + const entity = infoEntities[index]; const { labels } = entity; const [label] = labels; counts[label] = counts[label] ? counts[label] + 1 : 1; - }); + } return counts; }, [infoEntities]); const sortedLabels = useMemo(() => { diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 44e7fd325..877c1178b 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -1,17 +1,16 @@ -import { useEffect, useState, useMemo, useRef, Suspense } from 'react'; +import { useEffect, useState, useMemo, useRef, Suspense, useReducer, useCallback } from 'react'; import FileTable from './FileTable'; import { Button, Typography, Flex, StatusIndicator, useMediaQuery } from '@neo4j-ndl/react'; import { useCredentials } from '../context/UserCredentials'; import { useFileContext } from '../context/UsersFiles'; -import CustomAlert from './UI/Alert'; import { extractAPI } from '../utils/FileAPI'; import { + BannerAlertProps, ChildRef, ContentProps, CustomFile, OptionType, UserCredentials, - alertStateType, connectionState, } from '../types'; import deleteAPI from '../services/DeleteFiles'; @@ -19,22 +18,24 @@ import { postProcessing } from '../services/PostProcessing'; import { triggerStatusUpdateAPI } from '../services/ServerSideStatusUpdateAPI'; import useServerSideEvent from '../hooks/useSse'; import { useSearchParams } from 'react-router-dom'; -import { batchSize, buttonCaptions, defaultLLM, largeFileSize, llms, tooltips } from '../utils/Constants'; +import { batchSize, buttonCaptions, defaultLLM, largeFileSize, llms, RETRY_OPIONS, tooltips } from '../utils/Constants'; import ButtonWithToolTip from './UI/ButtonWithToolTip'; import connectAPI from '../services/ConnectAPI'; import DropdownComponent from './Dropdown'; import GraphViewModal from './Graph/GraphViewModal'; -import { OverridableStringUnion } from '@mui/types'; -import { AlertColor, AlertPropsColorOverrides } from '@mui/material'; import { lazy } from 'react'; import FallBackDialog from './UI/FallBackDialog'; import DeletePopUp from './Popups/DeletePopUp/DeletePopUp'; import GraphEnhancementDialog from './Popups/GraphEnhancementDialog'; import { tokens } from '@neo4j-ndl/base'; +import RetryConfirmationDialog from './Popups/RetryConfirmation/Index'; +import retry from '../services/retry'; +import { showErrorToast, showNormalToast, showSuccessToast } from '../utils/toasts'; import axios from 'axios'; const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); + let afterFirstRender = false; const Content: React.FC = ({ @@ -60,6 +61,14 @@ const Content: React.FC = ({ const { setUserCredentials, userCredentials, connectionStatus, setConnectionStatus } = useCredentials(); const [showConfirmationModal, setshowConfirmationModal] = useState(false); const [extractLoading, setextractLoading] = useState(false); + const [retryFile, setRetryFile] = useState(''); + const [retryLoading, setRetryLoading] = useState(false); + const [showRetryPopup, toggleRetryPopup] = useReducer((state) => !state, false); + const [alertStateForRetry, setAlertStateForRetry] = useState({ + showAlert: false, + alertType: 'neutral', + alertMessage: '', + }); const { filesData, @@ -80,39 +89,18 @@ const Content: React.FC = ({ const [showDeletePopUp, setshowDeletePopUp] = useState(false); const [deleteLoading, setdeleteLoading] = useState(false); const [searchParams] = useSearchParams(); - const [alertDetails, setalertDetails] = useState({ - showAlert: false, - alertType: 'error', - alertMessage: '', - }); + const { updateStatusForLargeFiles } = useServerSideEvent( (inMinutes, time, fileName) => { - setalertDetails({ - showAlert: true, - alertType: 'info', - alertMessage: `${fileName} will take approx ${time} ${inMinutes ? 'Min' : 'Sec'}`, - }); + showNormalToast(`${fileName} will take approx ${time} ${inMinutes ? 'Min' : 'Sec'}`); localStorage.setItem('alertShown', JSON.stringify(true)); }, (fileName) => { - setalertDetails({ - showAlert: true, - alertType: 'error', - alertMessage: `${fileName} Failed to process`, - }); + showErrorToast(`${fileName} Failed to process`); } ); const childRef = useRef(null); - const showAlert = ( - alertmsg: string, - alerttype: OverridableStringUnion | undefined - ) => { - setalertDetails({ - showAlert: true, - alertMessage: alertmsg, - alertType: alerttype, - }); - }; + useEffect(() => { if (!init && !searchParams.has('connectURL')) { let session = localStorage.getItem('neo4j.connection'); @@ -158,6 +146,57 @@ const Content: React.FC = ({ afterFirstRender = true; }, [queue.items.length, userCredentials]); + useEffect(() => { + const storedSchema = localStorage.getItem('isSchema'); + if (storedSchema !== null) { + setIsSchema(JSON.parse(storedSchema)); + } + }, [isSchema]); + + useEffect(() => { + const connection = localStorage.getItem('neo4j.connection'); + if (connection != null) { + (async () => { + const parsedData = JSON.parse(connection); + console.log(parsedData.uri); + const response = await connectAPI(parsedData.uri, parsedData.user, parsedData.password, parsedData.database); + if (response?.data?.status === 'Success') { + localStorage.setItem( + 'neo4j.connection', + JSON.stringify({ + ...parsedData, + userDbVectorIndex: response.data.data.db_vector_dimension, + }) + ); + if ( + (response.data.data.application_dimension === response.data.data.db_vector_dimension || + response.data.data.db_vector_dimension == 0) && + !response.data.data.chunks_exists + ) { + setConnectionStatus(true); + setOpenConnection((prev) => ({ ...prev, openPopUp: false })); + } else { + setOpenConnection({ + openPopUp: true, + chunksExists: response.data.data.chunks_exists as boolean, + vectorIndexMisMatch: + response.data.data.db_vector_dimension > 0 && + response.data.data.db_vector_dimension != response.data.data.application_dimension, + chunksExistsWithDifferentDimension: + response.data.data.db_vector_dimension > 0 && + response.data.data.db_vector_dimension != response.data.data.application_dimension && + (response.data.data.chunks_exists ?? true), + }); + setConnectionStatus(false); + } + } else { + setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + setConnectionStatus(false); + } + })(); + } + }, []); + const handleDropdownChange = (selectedOption: OptionType | null | void) => { if (selectedOption?.value) { setModel(selectedOption?.value); @@ -219,6 +258,7 @@ const Content: React.FC = ({ fileItem.model, userCredentials as UserCredentials, fileItem.fileSource, + fileItem.retryOption ?? '', fileItem.source_url, localStorage.getItem('accesskey'), localStorage.getItem('secretkey'), @@ -267,11 +307,7 @@ const Content: React.FC = ({ const { message, fileName } = error; queue.remove(fileName); const errorMessage = error.message; - setalertDetails({ - showAlert: true, - alertType: 'error', - alertMessage: message, - }); + showErrorToast(message); setFilesData((prevfiles) => prevfiles.map((curfile) => { if (curfile.name == fileName) { @@ -304,14 +340,10 @@ const Content: React.FC = ({ newCheck: boolean ) => { const data = []; - setalertDetails({ - showAlert: true, - alertMessage: `Processing ${batch.length} files at a time.`, - alertType: 'info', - }); + showNormalToast(`Processing ${batch.length} files at a time.`); for (let i = 0; i < batch.length; i++) { if (newCheck) { - if (batch[i]?.status === 'New') { + if (batch[i]?.status === 'New' || batch[i].status === 'Reprocess') { data.push(extractData(batch[i].id, isSelectedFiles, selectedFiles as CustomFile[])); } } else { @@ -322,7 +354,8 @@ const Content: React.FC = ({ }; const addFilesToQueue = (remainingFiles: CustomFile[]) => { - remainingFiles.forEach((f) => { + for (let index = 0; index < remainingFiles.length; index++) { + const f = remainingFiles[index]; setFilesData((prev) => prev.map((pf) => { if (pf.id === f.id) { @@ -335,7 +368,7 @@ const Content: React.FC = ({ }) ); queue.enqueue(f); - }); + } }; const scheduleBatchWiseProcess = (selectedRows: CustomFile[], isSelectedFiles: boolean) => { @@ -427,13 +460,12 @@ const Content: React.FC = ({ await postProcessing(userCredentials as UserCredentials, postProcessingTasks); }); } else { - const selectedNewFiles = childRef.current?.getSelectedRows().filter((f) => f.status === 'New'); + const selectedNewFiles = childRef.current + ?.getSelectedRows() + .filter((f) => f.status === 'New' || f.status == 'Reprocess'); addFilesToQueue(selectedNewFiles as CustomFile[]); } } - const handleClose = () => { - setalertDetails((prev) => ({ ...prev, showAlert: false, alertMessage: '' })); - }; const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; @@ -470,13 +502,48 @@ const Content: React.FC = ({ setSelectedRels([]); }; + const retryHandler = async (filename: string, retryoption: string) => { + try { + setRetryLoading(true); + const response = await retry(userCredentials as UserCredentials, filename, retryoption); + setRetryLoading(false); + if (response.data.status === 'Failure') { + throw new Error(response.data.error); + } + setFilesData((prev) => { + return prev.map((f) => { + return f.name === filename + ? { + ...f, + status: 'Reprocess', + processingProgress: retryoption.includes('start_from_beginning') ? 0 : f.processingProgress, + NodesCount: retryoption === RETRY_OPIONS[1] ? 0 : f.NodesCount, + relationshipCount: retryoption === RETRY_OPIONS[1] ? 0 : f.relationshipCount, + } + : f; + }); + }); + showSuccessToast(response.data.message as string); + retryOnclose(); + } catch (error) { + setRetryLoading(false); + if (error instanceof Error) { + setAlertStateForRetry({ + showAlert: true, + alertMessage: error.message, + alertType: 'danger', + }); + } + } + }; + const selectedfileslength = useMemo( () => childRef.current?.getSelectedRows().length, [childRef.current?.getSelectedRows()] ); const newFilecheck = useMemo( - () => childRef.current?.getSelectedRows().filter((f) => f.status === 'New').length, + () => childRef.current?.getSelectedRows().filter((f) => f.status === 'New' || f.status == 'Reprocess').length, [childRef.current?.getSelectedRows()] ); @@ -486,7 +553,7 @@ const Content: React.FC = ({ ); const dropdowncheck = useMemo( - () => !filesData.some((f) => f.status === 'New' || f.status === 'Waiting'), + () => !filesData.some((f) => f.status === 'New' || f.status === 'Waiting' || f.status === 'Reprocess'), [filesData] ); @@ -502,15 +569,16 @@ const Content: React.FC = ({ const filesForProcessing = useMemo(() => { let newstatusfiles: CustomFile[] = []; - if (childRef.current?.getSelectedRows().length) { - childRef.current?.getSelectedRows().forEach((f) => { - const parsedFile: CustomFile = f; - if (parsedFile.status === 'New') { + const selectedRows = childRef.current?.getSelectedRows(); + if (selectedRows?.length) { + for (let index = 0; index < selectedRows.length; index++) { + const parsedFile: CustomFile = selectedRows[index]; + if (parsedFile.status === 'New' || parsedFile.status == 'Reprocess') { newstatusfiles.push(parsedFile); } - }); + } } else if (filesData.length) { - newstatusfiles = filesData.filter((f) => f.status === 'New'); + newstatusfiles = filesData.filter((f) => f.status === 'New' || f.status === 'Reprocess'); } return newstatusfiles; }, [filesData, childRef.current?.getSelectedRows()]); @@ -528,15 +596,14 @@ const Content: React.FC = ({ setRowSelection({}); setdeleteLoading(false); if (response.data.status == 'Success') { - setalertDetails({ - showAlert: true, - alertMessage: response.data.message, - alertType: 'success', - }); + showSuccessToast(response.data.message); const filenames = childRef.current?.getSelectedRows().map((str) => str.name); - filenames?.forEach((name) => { - setFilesData((prev) => prev.filter((f) => f.name != name)); - }); + if (filenames?.length) { + for (let index = 0; index < filenames.length; index++) { + const name = filenames[index]; + setFilesData((prev) => prev.filter((f) => f.name != name)); + } + } } else { let errorobj = { error: response.data.error, message: response.data.message }; throw new Error(JSON.stringify(errorobj)); @@ -547,94 +614,41 @@ const Content: React.FC = ({ if (err instanceof Error) { const error = JSON.parse(err.message); const { message } = error; - setalertDetails({ - showAlert: true, - alertType: 'error', - alertMessage: message, - }); + showErrorToast(message); console.log(err); } } setshowDeletePopUp(false); }; - useEffect(() => { - const connection = localStorage.getItem('neo4j.connection'); - if (connection != null) { - (async () => { - const parsedData = JSON.parse(connection); - console.log(parsedData.uri); - const response = await connectAPI(parsedData.uri, parsedData.user, parsedData.password, parsedData.database); - if (response?.data?.status === 'Success') { - localStorage.setItem( - 'neo4j.connection', - JSON.stringify({ - ...parsedData, - userDbVectorIndex: response.data.data.db_vector_dimension, - }) - ); - if ( - (response.data.data.application_dimension === response.data.data.db_vector_dimension || - response.data.data.db_vector_dimension == 0) && - !response.data.data.chunks_exists - ) { - setConnectionStatus(true); - setOpenConnection((prev) => ({ ...prev, openPopUp: false })); - } else { - setOpenConnection({ - openPopUp: true, - chunksExists: response.data.data.chunks_exists as boolean, - vectorIndexMisMatch: - response.data.data.db_vector_dimension > 0 && - response.data.data.db_vector_dimension != response.data.data.application_dimension, - chunksExistsWithDifferentDimension: - response.data.data.db_vector_dimension > 0 && - response.data.data.db_vector_dimension != response.data.data.application_dimension && - (response.data.data.chunks_exists ?? true), - }); - setConnectionStatus(false); - } - } else { - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - setConnectionStatus(false); - } - })(); - } - }, []); - - useEffect(() => { - const storedSchema = localStorage.getItem('isSchema'); - if (storedSchema !== null) { - setIsSchema(JSON.parse(storedSchema)); - } - }, [isSchema]); const onClickHandler = () => { - if (childRef.current?.getSelectedRows().length) { + const selectedRows = childRef.current?.getSelectedRows(); + if (selectedRows?.length) { let selectedLargeFiles: CustomFile[] = []; - childRef.current?.getSelectedRows().forEach((f) => { - const parsedData: CustomFile = f; + for (let index = 0; index < selectedRows.length; index++) { + const parsedData: CustomFile = selectedRows[index]; if ( parsedData.fileSource === 'local file' && typeof parsedData.size === 'number' && - parsedData.status === 'New' && + (parsedData.status === 'New' || parsedData.status == 'Reprocess') && parsedData.size > largeFileSize ) { selectedLargeFiles.push(parsedData); } - }); + } if (selectedLargeFiles.length) { setshowConfirmationModal(true); } else { - handleGenerateGraph(childRef.current?.getSelectedRows().filter((f) => f.status === 'New')); + handleGenerateGraph(selectedRows.filter((f) => f.status === 'New' || f.status === 'Reprocess')); } } else if (filesData.length) { const largefiles = filesData.filter((f) => { - if (typeof f.size === 'number' && f.status === 'New' && f.size > largeFileSize) { + if (typeof f.size === 'number' && (f.status === 'New' || f.status == 'Reprocess') && f.size > largeFileSize) { return true; } return false; }); - const selectAllNewFiles = filesData.filter((f) => f.status === 'New'); + const selectAllNewFiles = filesData.filter((f) => f.status === 'New' || f.status === 'Reprocess'); const stringified = selectAllNewFiles.reduce((accu, f) => { const key = f.id; // @ts-ignore @@ -645,29 +659,41 @@ const Content: React.FC = ({ if (largefiles.length) { setshowConfirmationModal(true); } else { - handleGenerateGraph(filesData.filter((f) => f.status === 'New')); + handleGenerateGraph(filesData.filter((f) => f.status === 'New' || f.status === 'Reprocess')); } } }; + const retryOnclose = useCallback(() => { + setRetryFile(''); + setAlertStateForRetry({ + showAlert: false, + alertMessage: '', + alertType: 'neutral', + }); + setRetryLoading(false); + toggleRetryPopup(); + }, []); + + const onBannerClose = useCallback(() => { + setAlertStateForRetry({ + showAlert: false, + alertMessage: '', + alertType: 'neutral', + }); + }, []); + return ( <> - {alertDetails.showAlert && ( - - )} - {isSchema && ( - - )} + {showConfirmationModal && filesForProcessing.length && ( }> = ({ open={showEnhancementDialog} onClose={toggleEnhancementDialog} closeSettingModal={closeSettingModal} - showAlert={showAlert} > )}
@@ -777,6 +802,10 @@ const Content: React.FC = ({ setOpenGraphView(true); setViewPoint('tableView'); }} + onRetry={(id) => { + setRetryFile(id); + toggleRetryPopup(); + }} ref={childRef} handleGenerateGraph={processWaitingFilesOnRefresh} > diff --git a/frontend/src/components/DataSources/AWS/S3Modal.tsx b/frontend/src/components/DataSources/AWS/S3Modal.tsx index 2f40c9709..221fb0c9c 100644 --- a/frontend/src/components/DataSources/AWS/S3Modal.tsx +++ b/frontend/src/components/DataSources/AWS/S3Modal.tsx @@ -38,6 +38,8 @@ const S3Modal: React.FC = ({ hideModal, open }) => { model: model, fileSource: 's3 bucket', processingProgress: undefined, + retryOption: '', + retryOptionStatus: false, }; if (url) { setValid(validation(bucketUrl) && isFocused); @@ -123,7 +125,19 @@ const S3Modal: React.FC = ({ hideModal, open }) => { reset(); setStatus('unknown'); }; - + const handleKeyDown: React.KeyboardEventHandler = (e) => { + if (e.code === 'Enter') { + e.preventDefault(); // + // @ts-ignore + const { form } = e.target; + const index = Array.prototype.indexOf.call(form, e.target); + if (index + 1 < form.elements.length) { + form.elements[index + 1].focus(); + } else { + submitHandler(bucketUrl); + } + } + }; return ( = ({ hideModal, open }) => { submitLabel={buttonCaptions.submit} >
- setValid(validation(bucketUrl) && isFocused)} - onChange={(e) => { - setisFocused(true); - setBucketUrl(e.target.value); - }} - /> -
-
- { - setAccessKey(e.target.value); - }} - /> - { - setSecretKey(e.target.value); - }} - /> +
+ setValid(validation(bucketUrl) && isFocused)} + onChange={(e) => { + setisFocused(true); + setBucketUrl(e.target.value); + }} + onKeyDown={handleKeyDown} + /> +
+ { + setAccessKey(e.target.value); + }} + onKeyDown={handleKeyDown} + /> + { + setSecretKey(e.target.value); + }} + onKeyDown={handleKeyDown} + /> +
+
); diff --git a/frontend/src/components/DataSources/GCS/GCSModal.tsx b/frontend/src/components/DataSources/GCS/GCSModal.tsx index a4cd3d8f9..007ffab2b 100644 --- a/frontend/src/components/DataSources/GCS/GCSModal.tsx +++ b/frontend/src/components/DataSources/GCS/GCSModal.tsx @@ -9,6 +9,7 @@ import CustomModal from '../../../HOC/CustomModal'; import { useGoogleLogin } from '@react-oauth/google'; import { useAlertContext } from '../../../context/Alert'; import { buttonCaptions } from '../../../utils/Constants'; +import { showErrorToast, showNormalToast } from '../../../utils/toasts'; const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => { const [bucketName, setbucketName] = useState(''); @@ -30,6 +31,8 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => model: model, fileSource: 'gcs bucket', processingProgress: undefined, + retryOption: '', + retryOptionStatus: false, }; const reset = () => { @@ -67,7 +70,7 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => access_token: codeResponse.access_token, }); if (apiResponse.data.status == 'Failed' || !apiResponse.data) { - showAlert('error', apiResponse?.data?.message); + showErrorToast(apiResponse?.data?.message); setTimeout(() => { setStatus('unknown'); reset(); @@ -77,52 +80,54 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => } const apiResCheck = apiResponse?.data?.success_count && apiResponse.data.failed_count; if (apiResCheck) { - showAlert( - 'info', + showNormalToast( `Successfully Created Source Nodes for ${apiResponse.data.success_count} and Failed for ${apiResponse.data.failed_count} Files` ); } else if (apiResponse?.data?.success_count) { - showAlert('info', `Successfully Created Source Nodes for ${apiResponse.data.success_count} Files`); + showNormalToast(`Successfully Created Source Nodes for ${apiResponse.data.success_count} Files`); } else if (apiResponse.data.failed_count) { - showAlert('error', `Failed to Created Source Node for ${apiResponse.data.failed_count} Files`); + showErrorToast(`Failed to Created Source Node for ${apiResponse.data.failed_count} Files`); } else { - showAlert('error', `Invalid Folder Name`); + showErrorToast(`Invalid Folder Name`); } const copiedFilesData = [...filesData]; - apiResponse?.data?.file_name?.forEach((item: fileName) => { - const filedataIndex = copiedFilesData.findIndex((filedataitem) => filedataitem?.name === item.fileName); - if (filedataIndex == -1) { - copiedFilesData.unshift({ - name: item.fileName, - size: item.fileSize ?? 0, - gcsBucket: item.gcsBucketName, - gcsBucketFolder: item.gcsBucketFolder, - google_project_id: item.gcsProjectId, - id: uuidv4(), - access_token: codeResponse.access_token, - ...defaultValues, - }); - } else { - const tempFileData = copiedFilesData[filedataIndex]; - copiedFilesData.splice(filedataIndex, 1); - copiedFilesData.unshift({ - ...tempFileData, - status: defaultValues.status, - NodesCount: defaultValues.NodesCount, - relationshipCount: defaultValues.relationshipCount, - processing: defaultValues.processing, - model: defaultValues.model, - fileSource: defaultValues.fileSource, - processingProgress: defaultValues.processingProgress, - access_token: codeResponse.access_token, - }); + if (apiResponse?.data?.file_name?.length) { + for (let index = 0; index < apiResponse?.data?.file_name.length; index++) { + const item: fileName = apiResponse?.data?.file_name[index]; + const filedataIndex = copiedFilesData.findIndex((filedataitem) => filedataitem?.name === item.fileName); + if (filedataIndex == -1) { + copiedFilesData.unshift({ + name: item.fileName, + size: item.fileSize ?? 0, + gcsBucket: item.gcsBucketName, + gcsBucketFolder: item.gcsBucketFolder, + google_project_id: item.gcsProjectId, + id: uuidv4(), + access_token: codeResponse.access_token, + ...defaultValues, + }); + } else { + const tempFileData = copiedFilesData[filedataIndex]; + copiedFilesData.splice(filedataIndex, 1); + copiedFilesData.unshift({ + ...tempFileData, + status: defaultValues.status, + NodesCount: defaultValues.NodesCount, + relationshipCount: defaultValues.relationshipCount, + processing: defaultValues.processing, + model: defaultValues.model, + fileSource: defaultValues.fileSource, + processingProgress: defaultValues.processingProgress, + access_token: codeResponse.access_token, + }); + } } - }); + } setFilesData(copiedFilesData); reset(); } catch (error) { if (showAlert != undefined) { - showAlert('error', 'Some Error Occurred or Please Check your Instance Connection'); + showNormalToast('Some Error Occurred or Please Check your Instance Connection'); } } setTimeout(() => { @@ -131,15 +136,14 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => }, 500); }, onError: (errorResponse) => { - showAlert( - 'error', + showErrorToast( errorResponse.error_description ?? 'Some Error Occurred or Please try signin with your google account' ); }, scope: 'https://www.googleapis.com/auth/devstorage.read_only', onNonOAuthError: (error: nonoautherror) => { console.log(error); - showAlert('info', error.message as string); + showNormalToast(error.message as string); }, }); @@ -163,6 +167,19 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => reset(); setStatus('unknown'); }, []); + const handleKeyPress: React.KeyboardEventHandler = (e) => { + if (e.code === 'Enter') { + e.preventDefault(); // + // @ts-ignore + const { form } = e.target; + const index = Array.prototype.indexOf.call(form, e.target); + if (index + 1 < form.elements.length) { + form.elements[index + 1].focus(); + } else { + submitHandler(); + } + } + }; return ( = ({ hideModal, open, openGCSModal }) => submitLabel={buttonCaptions.submit} >
- { - setprojectId(e.target.value); - }} - > - { - setbucketName(e.target.value); - }} - /> - { - setFolderName(e.target.value); - }} - /> +
+ { + setprojectId(e.target.value); + }} + onKeyDown={handleKeyPress} + > + { + setbucketName(e.target.value); + }} + onKeyDown={handleKeyPress} + /> + { + setFolderName(e.target.value); + }} + onKeyDown={handleKeyPress} + /> +
); diff --git a/frontend/src/components/DataSources/Local/DropZone.tsx b/frontend/src/components/DataSources/Local/DropZone.tsx index 63ff08e37..7ff5d90d2 100644 --- a/frontend/src/components/DataSources/Local/DropZone.tsx +++ b/frontend/src/components/DataSources/Local/DropZone.tsx @@ -1,26 +1,21 @@ import { Dropzone, Flex, Typography } from '@neo4j-ndl/react'; -import React, { useState, useEffect, FunctionComponent } from 'react'; +import { useState, useEffect, FunctionComponent } from 'react'; import Loader from '../../../utils/Loader'; import { v4 as uuidv4 } from 'uuid'; import { useCredentials } from '../../../context/UserCredentials'; import { useFileContext } from '../../../context/UsersFiles'; -import CustomAlert from '../../UI/Alert'; -import { CustomFile, CustomFileBase, UserCredentials, alertStateType } from '../../../types'; +import { CustomFile, CustomFileBase, UserCredentials } from '../../../types'; import { buttonCaptions, chunkSize } from '../../../utils/Constants'; import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; import IconButtonWithToolTip from '../../UI/IconButtonToolTip'; import { uploadAPI } from '../../../utils/FileAPI'; +import { showErrorToast, showSuccessToast } from '../../../utils/toasts'; const DropZone: FunctionComponent = () => { const { filesData, setFilesData, model } = useFileContext(); const [isLoading, setIsLoading] = useState(false); const [isClicked, setIsClicked] = useState(false); const { userCredentials } = useCredentials(); - const [alertDetails, setalertDetails] = React.useState({ - showAlert: false, - alertType: 'error', - alertMessage: '', - }); const [selectedFiles, setSelectedFiles] = useState([]); const onDropHandler = (f: Partial[]) => { @@ -37,11 +32,13 @@ const DropZone: FunctionComponent = () => { fileSource: 'local file', uploadprogess: 0, processingProgress: undefined, + retryOptionStatus: false, + retryOption: '', }; const copiedFilesData: CustomFile[] = [...filesData]; - - f.forEach((file) => { + for (let index = 0; index < f.length; index++) { + const file = f[index]; const filedataIndex = copiedFilesData.findIndex((filedataitem) => filedataitem?.name === file?.name); if (filedataIndex == -1) { copiedFilesData.unshift({ @@ -67,22 +64,19 @@ const DropZone: FunctionComponent = () => { processingProgress: defaultValues.processingProgress, }); } - }); + } setFilesData(copiedFilesData); } }; - const handleClose = () => { - setalertDetails((prev) => ({ ...prev, showAlert: false, alertMessage: '' })); - }; - useEffect(() => { if (selectedFiles.length > 0) { - selectedFiles.forEach((file, uid) => { - if (filesData[uid]?.status == 'None' && isClicked) { + for (let index = 0; index < selectedFiles.length; index++) { + const file = selectedFiles[index]; + if (filesData[index]?.status == 'None' && isClicked) { uploadFileInChunks(file); } - }); + } } }, [selectedFiles]); const uploadFileInChunks = (file: File) => { @@ -162,17 +156,15 @@ const DropZone: FunctionComponent = () => { } } catch (error) { setIsLoading(false); - setalertDetails({ - showAlert: true, - alertType: 'error', - alertMessage: 'Error Occurred', - }); + if (error instanceof Error) { + showErrorToast(`Error Occurred: ${error.message}`, true); + } setFilesData((prevfiles) => prevfiles.map((curfile) => { if (curfile.name == file.name) { return { ...curfile, - status: 'Failed', + status: 'Upload Failed', type: `${file.name.substring(file.name.lastIndexOf('.') + 1, file.name.length).toUpperCase()}`, }; } @@ -195,11 +187,7 @@ const DropZone: FunctionComponent = () => { ); setIsClicked(false); setIsLoading(false); - setalertDetails({ - showAlert: true, - alertType: 'success', - alertMessage: `${file.name} uploaded successfully`, - }); + showSuccessToast(`${file.name} uploaded successfully`); } }; @@ -208,15 +196,6 @@ const DropZone: FunctionComponent = () => { return ( <> - {alertDetails.showAlert && ( - - )} - } isTesting={true} @@ -262,11 +241,7 @@ const DropZone: FunctionComponent = () => { }, onDropRejected: (e) => { if (e.length) { - setalertDetails({ - showAlert: true, - alertType: 'error', - alertMessage: 'Failed To Upload, Unsupported file extention', - }); + showErrorToast('Failed To Upload, Unsupported file extention'); } }, }} diff --git a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx index 06a16b639..21bd47240 100644 --- a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx +++ b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx @@ -3,36 +3,31 @@ import { useDropzone } from 'react-dropzone'; import { useFileContext } from '../../../context/UsersFiles'; import { useEffect, useState } from 'react'; import { useCredentials } from '../../../context/UserCredentials'; -import { alertStateType, CustomFile, CustomFileBase, UserCredentials } from '../../../types'; +import { CustomFile, CustomFileBase, UserCredentials } from '../../../types'; import { chunkSize } from '../../../utils/Constants'; import { uploadAPI } from '../../../utils/FileAPI'; -import CustomAlert from '../../UI/Alert'; import { v4 as uuidv4 } from 'uuid'; import { LoadingSpinner } from '@neo4j-ndl/react'; +import { showErrorToast, showSuccessToast } from '../../../utils/toasts'; export default function DropZoneForSmallLayouts() { const { filesData, setFilesData, model } = useFileContext(); const [isLoading, setIsLoading] = useState(false); const [isClicked, setIsClicked] = useState(false); const { userCredentials } = useCredentials(); - const [alertDetails, setalertDetails] = useState({ - showAlert: false, - alertType: 'error', - alertMessage: '', - }); + const [selectedFiles, setSelectedFiles] = useState([]); useEffect(() => { if (selectedFiles.length > 0) { - selectedFiles.forEach((file, uid) => { - if (filesData[uid]?.status == 'None' && isClicked) { + for (let index = 0; index < selectedFiles.length; index++) { + const file = selectedFiles[index]; + if (filesData[index]?.status == 'None' && isClicked) { uploadFileInChunks(file); } - }); + } } }, [selectedFiles]); - const handleClose = () => { - setalertDetails((prev) => ({ ...prev, showAlert: false, alertMessage: '' })); - }; + const uploadFileInChunks = (file: File) => { const totalChunks = Math.ceil(file.size / chunkSize); const chunkProgressIncrement = 100 / totalChunks; @@ -110,17 +105,15 @@ export default function DropZoneForSmallLayouts() { } } catch (error) { setIsLoading(false); - setalertDetails({ - showAlert: true, - alertType: 'error', - alertMessage: 'Error Occurred', - }); + if (error instanceof Error) { + showErrorToast('Error Occurred'); + } setFilesData((prevfiles) => prevfiles.map((curfile) => { if (curfile.name == file.name) { return { ...curfile, - status: 'Failed', + status: 'Upload Failed', type: `${file.name.substring(file.name.lastIndexOf('.') + 1, file.name.length).toUpperCase()}`, }; } @@ -143,11 +136,7 @@ export default function DropZoneForSmallLayouts() { ); setIsClicked(false); setIsLoading(false); - setalertDetails({ - showAlert: true, - alertType: 'success', - alertMessage: `${file.name} uploaded successfully`, - }); + showSuccessToast(`${file.name} uploaded successfully`); } }; @@ -169,11 +158,7 @@ export default function DropZoneForSmallLayouts() { }, onDropRejected: (e) => { if (e.length) { - setalertDetails({ - showAlert: true, - alertType: 'error', - alertMessage: 'Failed To Upload, Unsupported file extention', - }); + showErrorToast('Failed To Upload, Unsupported file extention'); } }, disabled: isLoading, @@ -193,11 +178,13 @@ export default function DropZoneForSmallLayouts() { fileSource: 'local file', uploadprogess: 0, processingProgress: undefined, + retryOption: '', + retryOptionStatus: false, }; const copiedFilesData: CustomFile[] = [...filesData]; - - f.forEach((file) => { + for (let index = 0; index < f.length; index++) { + const file = f[index]; const filedataIndex = copiedFilesData.findIndex((filedataitem) => filedataitem?.name === file?.name); if (filedataIndex == -1) { copiedFilesData.unshift({ @@ -223,21 +210,13 @@ export default function DropZoneForSmallLayouts() { processingProgress: defaultValues.processingProgress, }); } - }); + } setFilesData(copiedFilesData); } }; console.log(acceptedFiles); return ( <> - {alertDetails.showAlert && ( - - )}
{isLoading ? : } diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 23310eaf4..aa4d9bb9a 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -25,19 +25,17 @@ import { import { useFileContext } from '../context/UsersFiles'; import { getSourceNodes } from '../services/GetFiles'; import { v4 as uuidv4 } from 'uuid'; -import { statusCheck, capitalize } from '../utils/Utils'; import { - SourceNode, - CustomFile, - FileTableProps, - UserCredentials, - statusupdate, - alertStateType, - ChildRef, -} from '../types'; + statusCheck, + capitalize, + isFileCompleted, + calculateProcessedCount, + getFileSourceStatus, + isProcessingFileValid, +} from '../utils/Utils'; +import { SourceNode, CustomFile, FileTableProps, UserCredentials, statusupdate, ChildRef } from '../types'; import { useCredentials } from '../context/UserCredentials'; -import { MagnifyingGlassCircleIconSolid } from '@neo4j-ndl/react/icons'; -import CustomAlert from './UI/Alert'; +import { ArrowPathIconSolid, MagnifyingGlassCircleIconSolid } from '@neo4j-ndl/react/icons'; import CustomProgressBar from './UI/CustomProgressBar'; import subscribe from '../services/PollingAPI'; import { triggerStatusUpdateAPI } from '../services/ServerSideStatusUpdateAPI'; @@ -48,9 +46,10 @@ import cancelAPI from '../services/CancelAPI'; import IconButtonWithToolTip from './UI/IconButtonToolTip'; import { batchSize, largeFileSize, llms } from '../utils/Constants'; import IndeterminateCheckbox from './UI/CustomCheckBox'; +import { showErrorToast, showNormalToast } from '../utils/toasts'; let onlyfortheFirstRender = true; const FileTable = forwardRef((props, ref) => { - const { isExpanded, connectionStatus, setConnectionStatus, onInspect } = props; + const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry } = props; const { filesData, setFilesData, model, rowSelection, setRowSelection, setSelectedRows, setProcessedCount, queue } = useFileContext(); const { userCredentials } = useCredentials(); @@ -62,29 +61,15 @@ const FileTable = forwardRef((props, ref) => { const [fileSourceFilter, setFileSourceFilter] = useState(''); const [llmtypeFilter, setLLmtypeFilter] = useState(''); const skipPageResetRef = useRef(false); - const [alertDetails, setalertDetails] = useState({ - showAlert: false, - alertType: 'error', - alertMessage: '', - }); const tableRef = useRef(null); const { updateStatusForLargeFiles } = useServerSideEvent( (inMinutes, time, fileName) => { - setalertDetails({ - showAlert: true, - alertType: 'info', - alertMessage: `${fileName} will take approx ${time} ${inMinutes ? 'Min' : 'Sec'}`, - }); - localStorage.setItem('alertShown', JSON.stringify(true)); + showNormalToast(`${fileName} will take approx ${time} ${inMinutes ? 'Min' : 'Sec'}`); }, (fileName) => { - setalertDetails({ - showAlert: true, - alertType: 'error', - alertMessage: `${fileName} Failed to process`, - }); + showErrorToast(`${fileName} Failed to process`); } ); const columns = useMemo( @@ -117,7 +102,10 @@ const FileTable = forwardRef((props, ref) => { {...{ checked: row.getIsSelected(), disabled: - !row.getCanSelect() || row.original.status == 'Uploading' || row.original.status === 'Processing', + !row.getCanSelect() || + row.original.status == 'Uploading' || + row.original.status === 'Processing' || + row.original.status === 'Waiting', indeterminate: row.getIsSomeSelected(), onChange: row.getToggleSelectedHandler(), }} @@ -158,6 +146,22 @@ const FileTable = forwardRef((props, ref) => { > {info.getValue()} + {(info.getValue() === 'Completed' || + info.getValue() === 'Failed' || + info.getValue() === 'Cancelled') && ( + + onRetry(info?.row?.id as string)} + > + + + + )}
); } else if (info.getValue() === 'Processing' && info.row.original.processingProgress === undefined) { @@ -218,11 +222,17 @@ const FileTable = forwardRef((props, ref) => {
); } + return ( +
+ + {info.getValue()} +
+ ); }, header: () => Status, footer: (info) => info.column.id, filterFn: 'statusFilter' as any, - size: 200, + size: 250, meta: { columnActions: { actions: [ @@ -556,6 +566,37 @@ const FileTable = forwardRef((props, ref) => { skipPageResetRef.current = false; }, [filesData.length]); + const handleFileUploadError = (error: AxiosError) => { + // @ts-ignore + const errorfile = decodeURI(error?.config?.url?.split('?')[0].split('/').at(-1)); + setProcessedCount((prev) => Math.max(prev - 1, 0)); + setFilesData((prevfiles) => + prevfiles.map((curfile) => (curfile.name === errorfile ? { ...curfile, status: 'Failed' } : curfile)) + ); + }; + + const handleSmallFile = (item: SourceNode, userCredentials: UserCredentials) => { + subscribe( + item.fileName, + userCredentials?.uri, + userCredentials?.userName, + userCredentials?.database, + userCredentials?.password, + updatestatus, + updateProgress + ).catch(handleFileUploadError); + }; + + const handleLargeFile = (item: SourceNode, userCredentials: UserCredentials) => { + triggerStatusUpdateAPI( + item.fileName, + userCredentials.uri, + userCredentials.userName, + userCredentials.password, + userCredentials.database, + updateStatusForLargeFiles + ); + }; useEffect(() => { const waitingQueue: CustomFile[] = JSON.parse( localStorage.getItem('waitingQueue') ?? JSON.stringify({ queue: [] }) @@ -577,10 +618,14 @@ const FileTable = forwardRef((props, ref) => { if (res.data.status !== 'Failed') { const prefiles: CustomFile[] = []; if (res.data.data.length) { - res.data.data.forEach((item: SourceNode) => { + res.data.data.forEach((item) => { if (item.fileName != undefined && item.fileName.length) { const waitingFile = waitingQueue.length && waitingQueue.find((f: CustomFile) => f.name === item.fileName); + if (isFileCompleted(waitingFile as CustomFile, item)) { + setProcessedCount((prev) => calculateProcessedCount(prev, batchSize)); + queue.remove(item.fileName); + } if (waitingFile && item.status === 'Completed') { setProcessedCount((prev) => { if (prev === batchSize) { @@ -599,21 +644,8 @@ const FileTable = forwardRef((props, ref) => { NodesCount: item?.nodeCount ?? 0, processing: item?.processingTime ?? 'None', relationshipCount: item?.relationshipCount ?? 0, - status: waitingFile - ? 'Waiting' - : item?.fileSource === 's3 bucket' && localStorage.getItem('accesskey') === item?.awsAccessKeyId - ? item?.status - : item?.fileSource === 'local file' - ? item?.status - : item?.status === 'Completed' || item.status === 'Failed' - ? item?.status - : item?.fileSource === 'Wikipedia' || - item?.fileSource === 'youtube' || - item?.fileSource === 'gcs bucket' || - item?.fileSource === 'web-url' - ? item?.status - : 'N/A', - model: waitingFile ? waitingFile.model : item?.model ?? model, + status: waitingFile ? 'Waiting' : getFileSourceStatus(item), + model: item?.model ?? model, id: !waitingFile ? uuidv4() : waitingFile.id, source_url: item?.url != 'None' && item?.url != '' ? item.url : '', fileSource: item?.fileSource ?? 'None', @@ -629,58 +661,17 @@ const FileTable = forwardRef((props, ref) => { !isNaN(Math.floor((item?.processed_chunk / item?.total_chunks) * 100)) ? Math.floor((item?.processed_chunk / item?.total_chunks) * 100) : undefined, - // total_pages: item?.total_pages ?? 0, access_token: item?.access_token ?? '', + retryOption: item.retry_condition ?? '', + retryOptionStatus: false, }); } }); res.data.data.forEach((item) => { - if ( - item.status === 'Processing' && - item.fileName != undefined && - userCredentials && - userCredentials.database - ) { - if (item?.fileSize < largeFileSize) { - subscribe( - item.fileName, - userCredentials?.uri, - userCredentials?.userName, - userCredentials?.database, - userCredentials?.password, - updatestatus, - updateProgress - ).catch((error: AxiosError) => { - // @ts-ignore - const errorfile = decodeURI(error?.config?.url?.split('?')[0].split('/').at(-1)); - setProcessedCount((prev) => { - if (prev == batchSize) { - return batchSize - 1; - } - return prev + 1; - }); - setFilesData((prevfiles) => { - return prevfiles.map((curfile) => { - if (curfile.name == errorfile) { - return { - ...curfile, - status: 'Failed', - }; - } - return curfile; - }); - }); - }); - } else { - triggerStatusUpdateAPI( - item.fileName, - userCredentials.uri, - userCredentials.userName, - userCredentials.password, - userCredentials.database, - updateStatusForLargeFiles - ); - } + if (isProcessingFileValid(item, userCredentials as UserCredentials)) { + item.fileSize < largeFileSize + ? handleSmallFile(item, userCredentials as UserCredentials) + : handleLargeFile(item, userCredentials as UserCredentials); } }); } else { @@ -693,11 +684,10 @@ const FileTable = forwardRef((props, ref) => { } setIsLoading(false); } catch (error: any) { - setalertDetails({ - showAlert: true, - alertType: 'error', - alertMessage: error.message, - }); + if (error instanceof Error) { + showErrorToast(error.message); + } + setIsLoading(false); setConnectionStatus(false); setFilesData([]); @@ -717,11 +707,7 @@ const FileTable = forwardRef((props, ref) => { if (processingFilesCount === 1) { setProcessedCount(1); } - setalertDetails({ - showAlert: true, - alertType: 'info', - alertMessage: `Files are in processing please wait till previous batch completes`, - }); + showNormalToast(`Files are in processing please wait till previous batch completes`); } else { const waitingQueue: CustomFile[] = JSON.parse( localStorage.getItem('waitingQueue') ?? JSON.stringify({ queue: [] }) @@ -788,11 +774,7 @@ const FileTable = forwardRef((props, ref) => { const error = JSON.parse(err.message); if (Object.keys(error).includes('fileName')) { const { message } = error; - setalertDetails({ - showAlert: true, - alertType: 'error', - alertMessage: message, - }); + showErrorToast(message); } } } @@ -870,11 +852,12 @@ const FileTable = forwardRef((props, ref) => { // Component has content, calculate maximum height for table // Observes the height of the content and calculates own height accordingly const resizeObserver = new ResizeObserver((entries) => { - entries.forEach((entry) => { + for (let index = 0; index < entries.length; index++) { + const entry = entries[index]; const { height } = entry.contentRect; const rowHeight = document?.getElementsByClassName('ndl-data-grid-td')?.[0]?.clientHeight ?? 69; table.setPageSize(Math.floor(height / rowHeight)); - }); + } }); const [contentElement] = document.getElementsByClassName('ndl-data-grid-scrollable'); @@ -889,24 +872,12 @@ const FileTable = forwardRef((props, ref) => { const classNameCheck = isExpanded ? 'fileTableWithExpansion' : `filetable`; - const handleClose = () => { - setalertDetails((prev) => ({ ...prev, showAlert: false })); - localStorage.setItem('alertShown', JSON.stringify(true)); - }; useEffect(() => { setSelectedRows(table.getSelectedRowModel().rows.map((i) => i.id)); }, [table.getSelectedRowModel()]); return ( <> - {alertDetails.showAlert && ( - - )} {filesData ? ( <>
diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 8361ad3cd..c2c21116c 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -6,13 +6,11 @@ import Content from '../Content'; import SettingsModal from '../Popups/Settings/SettingModal'; import { clearChatAPI } from '../../services/QnaAPI'; import { useCredentials } from '../../context/UserCredentials'; -import { UserCredentials, alertStateType } from '../../types'; +import { UserCredentials } from '../../types'; import { useMessageContext } from '../../context/UserMessages'; -import { AlertColor, AlertPropsColorOverrides, useMediaQuery } from '@mui/material'; -import { OverridableStringUnion } from '@mui/types'; +import { useMediaQuery } from '@mui/material'; import { useFileContext } from '../../context/UsersFiles'; import SchemaFromTextDialog from '../Popups/Settings/SchemaFromText'; -import CustomAlert from '../UI/Alert'; export default function PageLayoutNew({ isSettingPanelExpanded, @@ -48,11 +46,7 @@ export default function PageLayoutNew({ setIsRightExpanded(false); } }; - const [alertDetails, setalertDetails] = useState({ - showAlert: false, - alertType: 'error', - alertMessage: '', - }); + const { messages } = useMessageContext(); const { isSchema, setIsSchema, setShowTextFromSchemaDialog, showTextFromSchemaDialog } = useFileContext(); @@ -72,30 +66,8 @@ export default function PageLayoutNew({ } }; - const showAlert = ( - alertmsg: string, - alerttype: OverridableStringUnion | undefined - ) => { - setalertDetails({ - showAlert: true, - alertMessage: alertmsg, - alertType: alerttype, - }); - }; - const handleClose = () => { - setalertDetails((prev) => ({ ...prev, showAlert: false, alertMessage: '' })); - }; - return (
- {alertDetails.showAlert && ( - - )} { diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx index 373007e95..24de576c5 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx @@ -179,9 +179,10 @@ export default function DeletePopUpForOrphanNodes({ await deleteHandler(table.getSelectedRowModel().rows.map((r) => r.id)); const selectedRows = table.getSelectedRowModel().rows.map((r) => r.id); setTotalOrphanNodes((prev) => prev - selectedRows.length); - selectedRows.forEach((eid: string) => { + for (let index = 0; index < selectedRows.length; index++) { + const eid: string = selectedRows[index]; setOrphanNodes((prev) => prev.filter((node) => node.e.elementId != eid)); - }); + } setshowDeletePopUp(false); if (totalOrphanNodes) { await fetchOrphanNodes(); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx index cca1a09a3..37f283d2a 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx @@ -5,11 +5,11 @@ import { Dropdown, Flex, Typography, useMediaQuery } from '@neo4j-ndl/react'; import { useCredentials } from '../../../../context/UserCredentials'; import { useFileContext } from '../../../../context/UsersFiles'; import { OnChangeValue, ActionMeta } from 'react-select'; -import { OptionType, OptionTypeForExamples, schema, UserCredentials } from '../../../../types'; -import { useAlertContext } from '../../../../context/Alert'; +import { OptionType, schema, UserCredentials } from '../../../../types'; import { getNodeLabelsAndRelTypes } from '../../../../services/GetNodeLabelsRelTypes'; import schemaExamples from '../../../../assets/schemas.json'; import { tokens } from '@neo4j-ndl/base'; +import { showNormalToast } from '../../../../utils/toasts'; export default function EntityExtractionSetting({ view, @@ -82,9 +82,15 @@ export default function EntityExtractionSetting({ const nodesFromSchema = selectedOptions.map((s) => JSON.parse(s.value).nodelabels).flat(); const relationsFromSchema = selectedOptions.map((s) => JSON.parse(s.value).relationshipTypes).flat(); let nodeOptionsFromSchema: OptionType[] = []; - nodesFromSchema.forEach((n) => nodeOptionsFromSchema.push({ label: n, value: n })); + for (let index = 0; index < nodesFromSchema.length; index++) { + const n = nodesFromSchema[index]; + nodeOptionsFromSchema.push({ label: n, value: n }); + } let relationshipOptionsFromSchema: OptionType[] = []; - relationsFromSchema.forEach((r) => relationshipOptionsFromSchema.push({ label: r, value: r })); + for (let index = 0; index < relationsFromSchema.length; index++) { + const r = relationsFromSchema[index]; + relationshipOptionsFromSchema.push({ label: r, value: r }); + } setSelectedNodes((prev) => { const combinedData = [...prev, ...nodeOptionsFromSchema]; const uniqueLabels = new Set(); @@ -145,11 +151,9 @@ export default function EntityExtractionSetting({ const [relationshipTypeOptions, setrelationshipTypeOptions] = useState([]); const [defaultExamples, setdefaultExamples] = useState([]); - const { showAlert } = useAlertContext(); - useEffect(() => { - const parsedData = schemaExamples.reduce((accu: OptionTypeForExamples[], example) => { - const examplevalues: OptionTypeForExamples = { + const parsedData = schemaExamples.reduce((accu: OptionType[], example) => { + const examplevalues: OptionType = { label: example.schema, value: JSON.stringify({ nodelabels: example.labels, @@ -227,7 +231,7 @@ export default function EntityExtractionSetting({ JSON.stringify({ db: userCredentials?.uri, selectedOptions: [] }) ); localStorage.setItem('selectedSchemas', JSON.stringify({ db: userCredentials?.uri, selectedOptions: [] })); - showAlert('info', `Successfully Removed the Schema settings`); + showNormalToast(`Successfully Removed the Schema settings`); if (view === 'Dialog' && onClose != undefined) { onClose(); } diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx index 84577c53c..dda797971 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx @@ -6,8 +6,6 @@ import deleteOrphanAPI from '../../../services/DeleteOrphanNodes'; import { UserCredentials } from '../../../types'; import { useCredentials } from '../../../context/UserCredentials'; import EntityExtractionSettings from './EnitityExtraction/EntityExtractionSetting'; -import { AlertColor, AlertPropsColorOverrides } from '@mui/material'; -import { OverridableStringUnion } from '@mui/types'; import { useFileContext } from '../../../context/UsersFiles'; import DeduplicationTab from './Deduplication'; import { tokens } from '@neo4j-ndl/base'; @@ -20,10 +18,6 @@ export default function GraphEnhancementDialog({ }: { open: boolean; onClose: () => void; - showAlert: ( - alertmsg: string, - alerttype: OverridableStringUnion | undefined - ) => void; closeSettingModal: () => void; }) { const { breakpoints } = tokens; diff --git a/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx b/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx index 50dbf6095..8d04467ff 100644 --- a/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx +++ b/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx @@ -91,12 +91,13 @@ export default function ConfirmationDialog({ extractHandler(selectedRows); } else { const tobeProcessFiles: CustomFile[] = []; - checked.forEach((id: string) => { + for (let index = 0; index < checked.length; index++) { + const id = checked[index]; const file = filesData.find((f) => f.id === id); if (file) { tobeProcessFiles.push(file); } - }); + } extractHandler(tobeProcessFiles); } setChecked([]); diff --git a/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx b/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx index 26c9cfcda..cc684156b 100644 --- a/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx +++ b/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx @@ -7,6 +7,7 @@ import { chunkSize } from '../../../utils/Constants'; import BellImage from '../../../assets/images/Stopwatch-blue.svg'; import AlertIcon from '../../Layout/AlertIcon'; import wikipedialogo from '../../../assets/images/wikipedia.svg'; +import wikipediadark from '../../../assets/images/wikipedia-darkmode.svg'; import youtubelogo from '../../../assets/images/youtube.svg'; import weblogo from '../../../assets/images/web.svg'; import webdarkmode from '../../../assets/images/web-darkmode.svg'; @@ -20,7 +21,7 @@ const LargeFilesAlert: FC = ({ largeFiles, handleToggle, checke const imageIcon: Record = useMemo( () => ({ - Wikipedia: wikipedialogo, + Wikipedia: colorMode === 'dark' ? wikipedialogo : wikipediadark, 'gcs bucket': gcslogo, youtube: youtubelogo, 's3 bucket': s3logo, diff --git a/frontend/src/components/Popups/RetryConfirmation/Index.tsx b/frontend/src/components/Popups/RetryConfirmation/Index.tsx new file mode 100644 index 000000000..d7b160e8e --- /dev/null +++ b/frontend/src/components/Popups/RetryConfirmation/Index.tsx @@ -0,0 +1,84 @@ +import { Banner, Dialog, Flex, Radio } from '@neo4j-ndl/react'; +import { RETRY_OPIONS } from '../../../utils/Constants'; +import { useFileContext } from '../../../context/UsersFiles'; +import { capitalize } from '../../../utils/Utils'; +import { BannerAlertProps } from '../../../types'; +import ButtonWithToolTip from '../../UI/ButtonWithToolTip'; + +export default function RetryConfirmationDialog({ + open, + onClose, + fileId, + retryLoading, + retryHandler, + alertStatus, + onBannerClose, +}: { + open: boolean; + onClose: () => void; + fileId: string; + retryLoading: boolean; + alertStatus: BannerAlertProps; + retryHandler: (filename: string, retryoption: string) => void; + onBannerClose: () => void; +}) { + const { filesData, setFilesData } = useFileContext(); + const file = filesData.find((c) => c.id === fileId); + const RetryOptionsForFile = file?.status != 'Completed' ? RETRY_OPIONS : RETRY_OPIONS.slice(0, 2); + return ( + + Reprocess Options + + {alertStatus.showAlert && ( + + {alertStatus.alertMessage} + + )} + + {RetryOptionsForFile.map((o, i) => { + return ( + { + setFilesData((prev) => { + return prev.map((f) => { + return f.id === fileId ? { ...f, retryOptionStatus: e.target.checked, retryOption: o } : f; + }); + }); + }} + name='retryoptions' + checked={o === file?.retryOption && file?.retryOptionStatus} + label={o + .split('_') + .map((s) => capitalize(s)) + .join(' ')} + onKeyDown={(e) => { + if (e.code === 'Enter' && file?.retryOption.length) { + retryHandler(file?.name as string, file?.retryOption as string); + } + }} + /> + ); + })} + + + + { + retryHandler(file?.name as string, file?.retryOption as string); + }} + size='large' + > + Continue + + + + + + ); +} diff --git a/frontend/src/components/Popups/Settings/SchemaFromText.tsx b/frontend/src/components/Popups/Settings/SchemaFromText.tsx index 58a605edc..8ebc15020 100644 --- a/frontend/src/components/Popups/Settings/SchemaFromText.tsx +++ b/frontend/src/components/Popups/Settings/SchemaFromText.tsx @@ -3,24 +3,18 @@ import { useCallback, useState } from 'react'; import { getNodeLabelsAndRelTypesFromText } from '../../../services/SchemaFromTextAPI'; import { useCredentials } from '../../../context/UserCredentials'; import { useFileContext } from '../../../context/UsersFiles'; -import { AlertColor, AlertPropsColorOverrides } from '@mui/material'; -import { OverridableStringUnion } from '@mui/types'; import { buttonCaptions } from '../../../utils/Constants'; import ButtonWithToolTip from '../../UI/ButtonWithToolTip'; +import { showNormalToast, showSuccessToast } from '../../../utils/toasts'; const SchemaFromTextDialog = ({ open, onClose, openSettingsDialog, - showAlert, }: { open: boolean; onClose: () => void; openSettingsDialog: () => void; - showAlert: ( - alertmsg: string, - alerttype: OverridableStringUnion | undefined - ) => void; }) => { const [userText, setUserText] = useState(''); const [loading, setloading] = useState(false); @@ -75,19 +69,15 @@ const SchemaFromTextDialog = ({ }); } if (response.data?.data?.relationshipTypes.length && response.data?.data?.labels.length) { - showAlert( - `Successfully Created ${response.data?.data?.labels.length} Node labels and ${response.data?.data?.relationshipTypes.length} Relationship labels`, - 'success' + showSuccessToast( + `Successfully Created ${response.data?.data?.labels.length} Node labels and ${response.data?.data?.relationshipTypes.length} Relationship labels` ); } else if (response.data?.data?.relationshipTypes.length && !response.data?.data?.labels.length) { - showAlert( - `Successfully Created ${response.data?.data?.relationshipTypes.length} Relationship labels`, - 'success' - ); + showSuccessToast(`Successfully Created ${response.data?.data?.relationshipTypes.length} Relationship labels`); } else if (!response.data?.data?.relationshipTypes.length && response.data?.data?.labels.length) { - showAlert(`Successfully Created ${response.data?.data?.labels.length} Node labels`, 'success'); + showSuccessToast(`Successfully Created ${response.data?.data?.labels.length} Node labels`); } else { - showAlert(`Please give meaningfull text`, 'success'); + showNormalToast(`Please give meaningfull text`); } } else { throw new Error('Unable to create labels from '); diff --git a/frontend/src/components/Popups/Settings/SettingModal.tsx b/frontend/src/components/Popups/Settings/SettingModal.tsx index 504ad88d4..8a2e35beb 100644 --- a/frontend/src/components/Popups/Settings/SettingModal.tsx +++ b/frontend/src/components/Popups/Settings/SettingModal.tsx @@ -1,6 +1,6 @@ import { Dialog, Dropdown } from '@neo4j-ndl/react'; import { OnChangeValue, ActionMeta } from 'react-select'; -import { OptionType, OptionTypeForExamples, SettingsModalProps, UserCredentials, schema } from '../../../types'; +import { OptionType, SettingsModalProps, UserCredentials, schema } from '../../../types'; import { useFileContext } from '../../../context/UsersFiles'; import { getNodeLabelsAndRelTypes } from '../../../services/GetNodeLabelsRelTypes'; import { useCredentials } from '../../../context/UserCredentials'; @@ -63,9 +63,15 @@ const SettingsModal: React.FC = ({ const nodesFromSchema = selectedOptions.map((s) => JSON.parse(s.value).nodelabels).flat(); const relationsFromSchema = selectedOptions.map((s) => JSON.parse(s.value).relationshipTypes).flat(); let nodeOptionsFromSchema: OptionType[] = []; - nodesFromSchema.forEach((n) => nodeOptionsFromSchema.push({ label: n, value: n })); + for (let index = 0; index < nodesFromSchema.length; index++) { + const n = nodesFromSchema[index]; + nodeOptionsFromSchema.push({ label: n, value: n }); + } let relationshipOptionsFromSchema: OptionType[] = []; - relationsFromSchema.forEach((r) => relationshipOptionsFromSchema.push({ label: r, value: r })); + for (let index = 0; index < relationsFromSchema.length; index++) { + const r = relationsFromSchema[index]; + relationshipOptionsFromSchema.push({ label: r, value: r }); + } setSelectedNodes((prev) => { const combinedData = [...prev, ...nodeOptionsFromSchema]; const uniqueLabels = new Set(); @@ -123,8 +129,8 @@ const SettingsModal: React.FC = ({ const { showAlert } = useAlertContext(); useEffect(() => { - const parsedData = schemaExamples.reduce((accu: OptionTypeForExamples[], example) => { - const examplevalues: OptionTypeForExamples = { + const parsedData = schemaExamples.reduce((accu: OptionType[], example) => { + const examplevalues: OptionType = { label: example.schema, value: JSON.stringify({ nodelabels: example.labels, diff --git a/frontend/src/components/WebSources/CustomSourceInput.tsx b/frontend/src/components/WebSources/CustomSourceInput.tsx index e03c4c185..5775e35a9 100644 --- a/frontend/src/components/WebSources/CustomSourceInput.tsx +++ b/frontend/src/components/WebSources/CustomSourceInput.tsx @@ -48,6 +48,11 @@ export default function CustomSourceInput({ onChange={onChangeHandler} errorText={!isValid && isFocused && 'Please Fill The Valid URL'} onPaste={onPasteHandler} + onKeyDown={(e) => { + if (e.code === 'Enter') { + submitHandler(value); + } + }} />
diff --git a/frontend/src/hooks/useSourceInput.tsx b/frontend/src/hooks/useSourceInput.tsx index 602acb947..eae33e3d4 100644 --- a/frontend/src/hooks/useSourceInput.tsx +++ b/frontend/src/hooks/useSourceInput.tsx @@ -1,5 +1,5 @@ import React, { useCallback, useState } from 'react'; -import { CustomFile, CustomFileBase, ScanProps, UserCredentials, fileName } from '../types'; +import { CustomFile, CustomFileBase, ScanProps, UserCredentials } from '../types'; import { useFileContext } from '../context/UsersFiles'; import { useCredentials } from '../context/UserCredentials'; import { urlScanAPI } from '../services/URLScan'; @@ -55,6 +55,8 @@ export default function useSourceInput( model: model, fileSource: fileSource, processingProgress: undefined, + retryOption: '', + retryOptionStatus: false, }; if (url.trim() != '') { setIsValid(validator(url) && isFocused); @@ -105,37 +107,40 @@ export default function useSourceInput( } const copiedFilesData: CustomFile[] = [...filesData]; - apiResponse?.data?.file_name?.forEach((item: fileName) => { - const filedataIndex = copiedFilesData.findIndex((filedataitem) => filedataitem?.name === item?.fileName); - if (filedataIndex == -1) { - const baseValues = { - name: item.fileName, - size: item.fileSize, - source_url: item.url, - id: uuidv4(), - language: item.language, - // total_pages: 1, - ...defaultValues, - }; - if (isWikiQuery) { - baseValues.wiki_query = item.fileName; + if (apiResponse?.data?.file_name?.length) { + for (let index = 0; index < apiResponse?.data?.file_name.length; index++) { + const item = apiResponse?.data?.file_name[index]; + const filedataIndex = copiedFilesData.findIndex((filedataitem) => filedataitem?.name === item?.fileName); + if (filedataIndex == -1) { + const baseValues = { + name: item.fileName, + size: item.fileSize, + source_url: item.url, + id: uuidv4(), + language: item.language, + // total_pages: 1, + ...defaultValues, + }; + if (isWikiQuery) { + baseValues.wiki_query = item.fileName; + } + copiedFilesData.unshift(baseValues); + } else { + const tempFileData = copiedFilesData[filedataIndex]; + copiedFilesData.splice(filedataIndex, 1); + copiedFilesData.unshift({ + ...tempFileData, + status: defaultValues.status, + NodesCount: defaultValues.NodesCount, + relationshipCount: defaultValues.relationshipCount, + processing: defaultValues.processing, + model: defaultValues.model, + fileSource: defaultValues.fileSource, + processingProgress: defaultValues.processingProgress, + }); } - copiedFilesData.unshift(baseValues); - } else { - const tempFileData = copiedFilesData[filedataIndex]; - copiedFilesData.splice(filedataIndex, 1); - copiedFilesData.unshift({ - ...tempFileData, - status: defaultValues.status, - NodesCount: defaultValues.NodesCount, - relationshipCount: defaultValues.relationshipCount, - processing: defaultValues.processing, - model: defaultValues.model, - fileSource: defaultValues.fileSource, - processingProgress: defaultValues.processingProgress, - }); } - }); + } setFilesData(copiedFilesData); setInputVal(''); setIsValid(false); diff --git a/frontend/src/services/CommonAPI.ts b/frontend/src/services/CommonAPI.ts index bbae83032..b55d002fe 100644 --- a/frontend/src/services/CommonAPI.ts +++ b/frontend/src/services/CommonAPI.ts @@ -7,7 +7,7 @@ const apiCall = async ( url: string, method: Method, commonParams: UserCredentials, - additionalParams: FormDataParams + additionalParams: Partial ) => { try { const formData = new FormData(); diff --git a/frontend/src/services/retry.ts b/frontend/src/services/retry.ts new file mode 100644 index 000000000..6c1b6c28c --- /dev/null +++ b/frontend/src/services/retry.ts @@ -0,0 +1,23 @@ +import axios from 'axios'; +import { url } from '../utils/Utils'; +import { UserCredentials, commonserverresponse } from '../types'; + +const retry = async (userCredentials: UserCredentials, file: string, retryOption: string) => { + try { + const formData = new FormData(); + + formData.append('uri', userCredentials?.uri ?? ''); + formData.append('database', userCredentials?.database ?? ''); + formData.append('userName', userCredentials?.userName ?? ''); + formData.append('password', userCredentials?.password ?? ''); + + formData.append('file_name', file); + formData.append('retry_condition', retryOption); + const response = await axios.post(`${url()}/retry_processing`, formData); + return response; + } catch (error) { + console.log('Error Posting the Question:', error); + throw error; + } +}; +export default retry; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 2802a2d6b..d16f56fa1 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -4,6 +4,7 @@ import React, { Dispatch, ReactNode, SetStateAction } from 'react'; import { OverridableStringUnion } from '@mui/types'; import type { Node, Relationship } from '@neo4j-nvl/base'; import { NonOAuthError } from '@react-oauth/google'; +import { BannerType } from '@neo4j-ndl/react'; export interface CustomFileBase extends Partial { processing: number | string; @@ -24,10 +25,11 @@ export interface CustomFileBase extends Partial { processingProgress?: number; access_token?: string; checked?: boolean; + retryOptionStatus: boolean; + retryOption: string; } export interface CustomFile extends CustomFileBase { id: string; - // total_pages: number | 'N/A'; } export interface OptionType { @@ -35,11 +37,6 @@ export interface OptionType { readonly label: string; } -export interface OptionTypeForExamples { - readonly value: string; - readonly label: string; -} - export type UserCredentials = { uri: string; userName: string; @@ -47,14 +44,27 @@ export type UserCredentials = { database: string; } & { [key: string]: any }; -export type ExtractParams = { +export interface SourceNode extends Omit { + fileName: string; + fileSize: number; + fileType: string; + nodeCount?: number; + processingTime?: string; + relationshipCount?: number; + url?: string; + awsAccessKeyId?: string; + uploadprogress?: number; + gcsProjectId?: string; + processed_chunk?: number; + total_chunks?: number; + retry_condition?: string; +} + +export type ExtractParams = Pick & { file?: File; - model: string; - source_url?: string; aws_access_key_id?: string | null; aws_secret_access_key?: string | null; max_sources?: number; - wiki_query?: string; gcs_bucket_name?: string; gcs_bucket_folder?: string; gcs_blob_filename?: string; @@ -63,8 +73,7 @@ export type ExtractParams = { allowedNodes?: string[]; allowedRelationship?: string[]; gcs_project_id?: string; - language?: string; - access_token?: string; + retry_condition: string; } & { [key: string]: any }; export type UploadParams = { @@ -96,36 +105,10 @@ export interface S3ModalProps { hideModal: () => void; open: boolean; } -export interface GCSModalProps { - hideModal: () => void; - open: boolean; +export interface GCSModalProps extends Omit { openGCSModal: () => void; } -export interface SourceNode { - fileName: string; - fileSize: number; - fileType: string; - nodeCount?: number; - processingTime?: string; - relationshipCount?: number; - model: string; - status: string; - url?: string; - awsAccessKeyId?: string; - fileSource: string; - gcsBucket?: string; - gcsBucketFolder?: string; - errorMessage?: string; - uploadprogress?: number; - gcsProjectId?: string; - language?: string; - processed_chunk?: number; - total_chunks?: number; - // total_pages?: number; - access_token?: string; -} - export interface SideNavProps { isExpanded: boolean; position: 'left' | 'right'; @@ -171,6 +154,7 @@ export interface FileTableProps { setConnectionStatus: Dispatch>; onInspect: (id: string) => void; handleGenerateGraph: () => void; + onRetry: (id: string) => void; } export interface CustomModalProps { @@ -249,10 +233,7 @@ export type ChatbotProps = { isFullScreen?: boolean; connectionStatus: boolean; }; -export interface WikipediaModalTypes { - hideModal: () => void; - open: boolean; -} +export interface WikipediaModalTypes extends Omit {} export interface GraphViewModalProps { open: boolean; @@ -341,7 +322,9 @@ export type alertStateType = { alertType: OverridableStringUnion | undefined; alertMessage: string; }; - +export interface BannerAlertProps extends Omit { + alertType: BannerType; +} export type Scheme = Record; export type LabelCount = Record; @@ -437,19 +420,11 @@ export interface chatInfoMessage extends Partial { error: string; } -export interface eventResponsetypes { - fileName: string; - status: string; - processingTime: number; - nodeCount: number; - relationshipCount: number; - model: string; +export interface eventResponsetypes extends Omit { total_chunks: number | null; - // total_pages: number; - fileSize: number; - processed_chunk?: number; - fileSource: string; + processingTime: number; } + export type Nullable = Type | null; export type LabelColors = 'default' | 'success' | 'info' | 'warning' | 'danger' | undefined; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 47f1e119f..114b20fa0 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -185,7 +185,11 @@ export const POST_PROCESSING_JOBS: { title: string; description: string }[] = [ performing similarity-based searches.`, }, ]; - +export const RETRY_OPIONS = [ + 'start_from_beginning', + 'delete_entities_and_start_from_beginning', + 'start_from_last_processed_position', +]; export const batchSize: number = parseInt(process.env.VITE_BATCH_SIZE ?? '2'); export const nvlOptions: NvlOptions = { diff --git a/frontend/src/utils/FileAPI.ts b/frontend/src/utils/FileAPI.ts index b6dcabeba..2d248260f 100644 --- a/frontend/src/utils/FileAPI.ts +++ b/frontend/src/utils/FileAPI.ts @@ -25,6 +25,7 @@ export const extractAPI = async ( model: string, userCredentials: UserCredentials, source_type: string, + retry_condition: string, source_url?: string, aws_access_key_id?: string | null, aws_secret_access_key?: string | null, @@ -51,6 +52,7 @@ export const extractAPI = async ( file_name, allowedNodes, allowedRelationship, + retry_condition, }; } else if (source_type === 'Wikipedia') { additionalParams = { @@ -61,6 +63,7 @@ export const extractAPI = async ( allowedNodes, allowedRelationship, language, + retry_condition, }; } else if (source_type === 'gcs bucket') { additionalParams = { @@ -74,6 +77,7 @@ export const extractAPI = async ( allowedRelationship, gcs_project_id, access_token, + retry_condition, }; } else if (source_type === 'youtube') { additionalParams = { @@ -83,6 +87,7 @@ export const extractAPI = async ( file_name, allowedNodes, allowedRelationship, + retry_condition, }; } else if (source_type === 'web-url') { additionalParams = { @@ -92,6 +97,7 @@ export const extractAPI = async ( file_name, allowedNodes, allowedRelationship, + retry_condition, }; } else { additionalParams = { @@ -100,6 +106,7 @@ export const extractAPI = async ( file_name, allowedNodes, allowedRelationship, + retry_condition, }; } const response = await apiCall(urlExtract, method, commonParams, additionalParams); diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 0b8b05841..8b49c7e7d 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -1,6 +1,16 @@ import { calcWordColor } from '@neo4j-devtools/word-color'; import type { Relationship } from '@neo4j-nvl/base'; -import { Entity, ExtendedNode, ExtendedRelationship, GraphType, Messages, Scheme } from '../types'; +import { + CustomFile, + Entity, + ExtendedNode, + ExtendedRelationship, + GraphType, + Messages, + Scheme, + SourceNode, + UserCredentials, +} from '../types'; // Get the Url export const url = () => { @@ -48,6 +58,10 @@ export const statusCheck = (status: string) => { return 'warning'; case 'Failed': return 'danger'; + case 'Upload Failed': + return 'danger'; + case 'Reprocess': + return 'info'; default: return 'unknown'; } @@ -134,12 +148,13 @@ export const processGraphData = (neoNodes: ExtendedNode[], neoRels: ExtendedRela const schemeVal: Scheme = {}; let iterator = 0; const labels: string[] = neoNodes.map((f: any) => f.labels); - labels.forEach((label: any) => { + for (let index = 0; index < labels.length; index++) { + const label = labels[index]; if (schemeVal[label] == undefined) { schemeVal[label] = calcWordColor(label[0]); iterator += 1; } - }); + } const newNodes: ExtendedNode[] = neoNodes.map((g: any) => { return { id: g.element_id, @@ -240,6 +255,35 @@ export const titleCheck = (title: string) => { return title === 'Chunk' || title === 'Document'; }; +export const getFileSourceStatus = (item: SourceNode) => { + if (item?.fileSource === 's3 bucket' && localStorage.getItem('accesskey') === item?.awsAccessKeyId) { + return item?.status; + } + if (item?.fileSource === 'local file') { + return item?.status; + } + if (item?.status === 'Completed' || item.status === 'Failed' || item.status === 'Reprocess') { + return item?.status; + } + if ( + item?.fileSource === 'Wikipedia' || + item?.fileSource === 'youtube' || + item?.fileSource === 'gcs bucket' || + item?.fileSource === 'web-url' + ) { + return item?.status; + } + return 'N/A'; +}; +export const isFileCompleted = (waitingFile: CustomFile, item: SourceNode) => + waitingFile && item.status === 'Completed'; + +export const calculateProcessedCount = (prev: number, batchSize: number) => + (prev === batchSize ? batchSize - 1 : prev + 1); + +export const isProcessingFileValid = (item: SourceNode, userCredentials: UserCredentials) => { + return item.status === 'Processing' && item.fileName != undefined && userCredentials && userCredentials.database; +}; export const sortAlphabetically = (a: Relationship, b: Relationship) => { const captionOne = a.caption?.toLowerCase() || ''; const captionTwo = b.caption?.toLowerCase() || ''; diff --git a/frontend/src/utils/toasts.ts b/frontend/src/utils/toasts.ts new file mode 100644 index 000000000..832c33b9a --- /dev/null +++ b/frontend/src/utils/toasts.ts @@ -0,0 +1,22 @@ +import { toast } from '@neo4j-ndl/react'; + +export const showErrorToast = (message: string, shouldAutoClose: boolean = true) => { + return toast.danger(message, { + isCloseable: true, + shouldAutoClose: shouldAutoClose, + }); +}; + +export const showSuccessToast = (message: string) => { + return toast.success(message, { + isCloseable: true, + shouldAutoClose: true, + }); +}; + +export const showNormalToast = (message: string) => { + return toast.neutral(message, { + isCloseable: true, + shouldAutoClose: true, + }); +}; diff --git a/frontend/yarn.lock b/frontend/yarn.lock index 3915845ef..977740e94 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -1862,6 +1862,7 @@ dependencies: "@react-types/shared" "^3.24.1" + "@react-types/combobox@^3.12.1": version "3.12.1" resolved "https://registry.yarnpkg.com/@react-types/combobox/-/combobox-3.12.1.tgz#ab015d31c160aa0a21d696887ce81467c5996602" From 0cb7ed93e4ab94ac2785d668e3d13530e5b9e9f9 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Wed, 4 Sep 2024 14:22:49 +0530 Subject: [PATCH 044/292] ref added for keydown (#717) * ref addded for keydown * enter key changes * tooltip changes * ishover state * theme fix * removal of handleKeydown * format fix --- .../ConnectionModal/ConnectionModal.tsx | 124 ++++++++++++------ .../Deduplication/index.tsx | 4 +- .../EntityExtractionSetting.tsx | 2 +- .../Popups/GraphEnhancementDialog/index.tsx | 2 +- .../src/components/UI/ButtonWithToolTip.tsx | 15 ++- .../src/components/UI/IconButtonToolTip.tsx | 12 +- frontend/src/context/ThemeWrapper.tsx | 26 ++-- 7 files changed, 121 insertions(+), 64 deletions(-) diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index ab9fe2399..8274e626a 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -1,5 +1,5 @@ import { Button, Dialog, TextInput, Dropdown, Banner, Dropzone, Typography, TextLink, Flex } from '@neo4j-ndl/react'; -import { useCallback, useEffect, useMemo, useState } from 'react'; +import React, { useCallback, useEffect, useMemo, useState, useRef } from 'react'; import connectAPI from '../../../services/ConnectAPI'; import { useCredentials } from '../../../context/UserCredentials'; import { useSearchParams } from 'react-router-dom'; @@ -46,6 +46,11 @@ export default function ConnectionModal({ const [searchParams, setSearchParams] = useSearchParams(); const [userDbVectorIndex, setUserDbVectorIndex] = useState(initialuserdbvectorindex ?? undefined); const [vectorIndexLoading, setVectorIndexLoading] = useState(false); + const connectRef = useRef(null); + const uriRef = useRef(null); + const databaseRef = useRef(null); + const userNameRef = useRef(null); + const passwordRef = useRef(null); useEffect(() => { if (searchParams.has('connectURL')) { @@ -274,6 +279,24 @@ export default function ConnectionModal({ setMessage({ type: 'unknown', content: '' }); }, []); + const handleKeyPress = (e: React.KeyboardEvent, nextRef?: React.RefObject) => { + if (e.code === 'Enter') { + e.preventDefault(); + // @ts-ignore + const { form } = e.target; + if (form) { + const index = Array.prototype.indexOf.call(form, e.target); + if (index + 1 < form.elements.length) { + form.elements[index + 1].focus(); + } else { + submitConnection(); + } + } else { + nextRef?.current?.focus(); + } + } + }; + const isDisabled = useMemo(() => !username || !URI || !password, [username, URI, password]); return ( @@ -348,6 +371,7 @@ export default function ConnectionModal({ />
setURI(e.target.value)} onPaste={(e) => handleHostPasteChange(e)} aria-label='Connection URI' + onKeyDown={(e) => handleKeyPress(e, databaseRef)} />
- setDatabase(e.target.value)} - className='w-full' - /> -
-
- setUsername(e.target.value)} - /> -
-
- setPassword(e.target.value)} - /> +
+ setDatabase(e.target.value)} + className='w-full' + onKeyDown={handleKeyPress} + /> +
+
+ setUsername(e.target.value)} + onKeyDown={handleKeyPress} + /> +
+
+ setPassword(e.target.value)} + onKeyDown={handleKeyPress} + /> +
-
+ - diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 36fcff3d1..f5a021e30 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -80,12 +80,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - d.e.elementId === nodeid + (d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d + : d) ); }); }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx index 37f283d2a..5bbd4607a 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx @@ -312,7 +312,7 @@ export default function EntityExtractionSetting({ type='creatable' /> - + { + const [isHovered, setIsHovered] = useState(false); return ( - + - - {text} - + {isHovered && ( + + {text} + + )} ); }; diff --git a/frontend/src/components/UI/IconButtonToolTip.tsx b/frontend/src/components/UI/IconButtonToolTip.tsx index 62f438005..9622fd8e2 100644 --- a/frontend/src/components/UI/IconButtonToolTip.tsx +++ b/frontend/src/components/UI/IconButtonToolTip.tsx @@ -1,4 +1,5 @@ import { IconButton, Tip } from '@neo4j-ndl/react'; +import { useState } from 'react'; const IconButtonWithToolTip = ({ text, @@ -21,6 +22,7 @@ const IconButtonWithToolTip = ({ placement?: 'bottom' | 'top' | 'right' | 'left'; disabled?: boolean; }) => { + const [isHovered, setIsHovered] = useState(false); return ( @@ -31,13 +33,17 @@ const IconButtonWithToolTip = ({ grouped={grouped} onClick={onClick} disabled={disabled} + onMouseEnter={() => setIsHovered(true)} + onMouseLeave={() => setIsHovered(false)} > {children} - - {text} - + {isHovered && ( + + {text} + + )} ); }; diff --git a/frontend/src/context/ThemeWrapper.tsx b/frontend/src/context/ThemeWrapper.tsx index 110e8638b..963cbe3b8 100644 --- a/frontend/src/context/ThemeWrapper.tsx +++ b/frontend/src/context/ThemeWrapper.tsx @@ -1,36 +1,39 @@ -import { ReactNode, createContext, useMemo, useState } from 'react'; +import { ReactNode, createContext, useMemo, useState, useEffect } from 'react'; import { NeedleThemeProvider, useMediaQuery } from '@neo4j-ndl/react'; - export const ThemeWrapperContext = createContext({ toggleColorMode: () => {}, colorMode: localStorage.getItem('mode') as 'light' | 'dark', }); - interface ThemeWrapperProps { children: ReactNode; } const ThemeWrapper = ({ children }: ThemeWrapperProps) => { const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)'); - // @ts-ignore - const defaultMode: 'light' | 'dark' = localStorage.getItem('mode'); - const [mode, setMode] = useState<'light' | 'dark'>(prefersDarkMode ? 'dark' : defaultMode ?? 'light'); - const [usingPreferredMode, setUsingPreferredMode] = useState(true); + const defaultMode = localStorage.getItem('mode') as 'light' | 'dark'; + const [mode, setMode] = useState<'light' | 'dark'>(defaultMode ?? (prefersDarkMode ? 'dark' : 'light')); + const [usingPreferredMode, setUsingPreferredMode] = useState(!defaultMode); + + useEffect(() => { + // Ensure the body class is updated on initial load + themeBodyInjection(mode); + }, [mode]); const themeWrapperUtils = useMemo( () => ({ colorMode: mode, toggleColorMode: () => { setMode((prevMode) => { + const newMode = prevMode === 'light' ? 'dark' : 'light'; setUsingPreferredMode(false); - localStorage.setItem('mode', prevMode === 'light' ? 'dark' : 'light'); - themeBodyInjection(prevMode); - return prevMode === 'light' ? 'dark' : 'light'; + localStorage.setItem('mode', newMode); + themeBodyInjection(newMode); + return newMode; }); }, }), [mode] ); const themeBodyInjection = (mode: string) => { - if (mode === 'light') { + if (mode === 'dark') { document.body.classList.add('ndl-theme-dark'); } else { document.body.classList.remove('ndl-theme-dark'); @@ -40,7 +43,6 @@ const ThemeWrapper = ({ children }: ThemeWrapperProps) => { if (usingPreferredMode) { prefersDarkMode ? themeBodyInjection('light') : themeBodyInjection('dark'); } - return ( From 18b0093f8bde824b7fe6d8d1d21c91b0d24a72b5 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Wed, 4 Sep 2024 14:24:32 +0530 Subject: [PATCH 045/292] Remove total_pages propert. It is not used in DB. (#714) Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> --- backend/score.py | 2 -- backend/src/document_sources/gcs_bucket.py | 1 - backend/src/document_sources/local_file.py | 16 ++++++++-------- backend/src/entities/source_node.py | 1 - backend/src/graphDB_dataAccess.py | 9 +++------ backend/src/main.py | 8 +------- docs/backend/backend_docs.adoc | 1 - 7 files changed, 12 insertions(+), 26 deletions(-) diff --git a/backend/score.py b/backend/score.py index 08ea9a6b2..6d2cd79a1 100644 --- a/backend/score.py +++ b/backend/score.py @@ -445,7 +445,6 @@ async def generate(): 'relationshipCount':result[0]['relationshipCount'], 'model':result[0]['model'], 'total_chunks':result[0]['total_chunks'], - 'total_pages':result[0]['total_pages'], 'fileSize':result[0]['fileSize'], 'processed_chunk':result[0]['processed_chunk'], 'fileSource':result[0]['fileSource'] @@ -504,7 +503,6 @@ async def get_document_status(file_name, url, userName, password, database): 'relationshipCount':result[0]['relationshipCount'], 'model':result[0]['model'], 'total_chunks':result[0]['total_chunks'], - 'total_pages':result[0]['total_pages'], 'fileSize':result[0]['fileSize'], 'processed_chunk':result[0]['processed_chunk'], 'fileSource':result[0]['fileSource'] diff --git a/backend/src/document_sources/gcs_bucket.py b/backend/src/document_sources/gcs_bucket.py index 4a5909fc4..91830f591 100644 --- a/backend/src/document_sources/gcs_bucket.py +++ b/backend/src/document_sources/gcs_bucket.py @@ -122,7 +122,6 @@ def merge_file_gcs(bucket_name, original_file_name: str, folder_name_sha1_hashed blob.upload_from_file(file_io) # pdf_reader = PdfReader(file_io) file_size = len(merged_file) - # total_pages = len(pdf_reader.pages) return file_size except Exception as e: diff --git a/backend/src/document_sources/local_file.py b/backend/src/document_sources/local_file.py index 9fb31649f..ed46210f4 100644 --- a/backend/src/document_sources/local_file.py +++ b/backend/src/document_sources/local_file.py @@ -56,19 +56,19 @@ def get_pages_with_page_numbers(unstructured_pages): if page.metadata['page_number']==page_number: page_content += page.page_content metadata = {'source':page.metadata['source'],'page_number':page_number, 'filename':page.metadata['filename'], - 'filetype':page.metadata['filetype'], 'total_pages':unstructured_pages[-1].metadata['page_number']} + 'filetype':page.metadata['filetype']} if page.metadata['page_number']>page_number: page_number+=1 - if not metadata: - metadata = {'total_pages':unstructured_pages[-1].metadata['page_number']} - pages.append(Document(page_content = page_content, metadata=metadata)) + # if not metadata: + # metadata = {'total_pages':unstructured_pages[-1].metadata['page_number']} + pages.append(Document(page_content = page_content)) page_content='' if page == unstructured_pages[-1]: - if not metadata: - metadata = {'total_pages':unstructured_pages[-1].metadata['page_number']} - pages.append(Document(page_content = page_content, metadata=metadata)) + # if not metadata: + # metadata = {'total_pages':unstructured_pages[-1].metadata['page_number']} + pages.append(Document(page_content = page_content)) elif page.metadata['category']=='PageBreak' and page!=unstructured_pages[0]: page_number+=1 @@ -80,7 +80,7 @@ def get_pages_with_page_numbers(unstructured_pages): page_content += page.page_content metadata_with_custom_page_number = {'source':page.metadata['source'], 'page_number':1, 'filename':page.metadata['filename'], - 'filetype':page.metadata['filetype'], 'total_pages':1} + 'filetype':page.metadata['filetype']} if page == unstructured_pages[-1]: pages.append(Document(page_content = page_content, metadata=metadata_with_custom_page_number)) return pages \ No newline at end of file diff --git a/backend/src/entities/source_node.py b/backend/src/entities/source_node.py index 1a1c70487..a162b6d04 100644 --- a/backend/src/entities/source_node.py +++ b/backend/src/entities/source_node.py @@ -18,7 +18,6 @@ class sourceNode: updated_at:datetime=None processing_time:float=None error_message:str=None - total_pages:int=None total_chunks:int=None language:str=None is_cancelled:bool=None diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index c1eaaa190..36a9a2925 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -37,14 +37,14 @@ def create_source_node(self, obj_source_node:sourceNode): d.processingTime = $pt, d.errorMessage = $e_message, d.nodeCount= $n_count, d.relationshipCount = $r_count, d.model= $model, d.gcsBucket=$gcs_bucket, d.gcsBucketFolder= $gcs_bucket_folder, d.language= $language,d.gcsProjectId= $gcs_project_id, - d.is_cancelled=False, d.total_chunks=0, d.processed_chunk=0, d.total_pages=$total_pages, + d.is_cancelled=False, d.total_chunks=0, d.processed_chunk=0, d.access_token=$access_token""", {"fn":obj_source_node.file_name, "fs":obj_source_node.file_size, "ft":obj_source_node.file_type, "st":job_status, "url":obj_source_node.url, "awsacc_key_id":obj_source_node.awsAccessKeyId, "f_source":obj_source_node.file_source, "c_at":obj_source_node.created_at, "u_at":obj_source_node.created_at, "pt":0, "e_message":'', "n_count":0, "r_count":0, "model":obj_source_node.model, "gcs_bucket": obj_source_node.gcsBucket, "gcs_bucket_folder": obj_source_node.gcsBucketFolder, - "language":obj_source_node.language, "gcs_project_id":obj_source_node.gcsProjectId, "total_pages": obj_source_node.total_pages, + "language":obj_source_node.language, "gcs_project_id":obj_source_node.gcsProjectId, "access_token":obj_source_node.access_token}) except Exception as e: error_message = str(e) @@ -80,9 +80,6 @@ def update_source_node(self, obj_source_node:sourceNode): if obj_source_node.model is not None and obj_source_node.model != '': params['model'] = obj_source_node.model - if obj_source_node.total_pages is not None and obj_source_node.total_pages != 0: - params['total_pages'] = obj_source_node.total_pages - if obj_source_node.total_chunks is not None and obj_source_node.total_chunks != 0: params['total_chunks'] = obj_source_node.total_chunks @@ -190,7 +187,7 @@ def get_current_status_document_node(self, file_name): query = """ MATCH(d:Document {fileName : $file_name}) RETURN d.status AS Status , d.processingTime AS processingTime, d.nodeCount AS nodeCount, d.model as model, d.relationshipCount as relationshipCount, - d.total_pages AS total_pages, d.total_chunks AS total_chunks , d.fileSize as fileSize, + d.total_chunks AS total_chunks , d.fileSize as fileSize, d.is_cancelled as is_cancelled, d.processed_chunk as processed_chunk, d.fileSource as fileSource """ param = {"file_name" : file_name} diff --git a/backend/src/main.py b/backend/src/main.py index 3a88f1eb3..caf5d58dc 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -52,7 +52,6 @@ def create_source_node_graph_url_s3(graph, model, source_url, aws_access_key_id, obj_source_node.file_type = 'pdf' obj_source_node.file_size = file_info['file_size_bytes'] obj_source_node.file_source = source_type - obj_source_node.total_pages = 'N/A' obj_source_node.model = model obj_source_node.url = str(source_url+file_name) obj_source_node.awsAccessKeyId = aws_access_key_id @@ -82,7 +81,6 @@ def create_source_node_graph_url_gcs(graph, model, gcs_project_id, gcs_bucket_na obj_source_node.file_size = file_metadata['fileSize'] obj_source_node.url = file_metadata['url'] obj_source_node.file_source = source_type - obj_source_node.total_pages = 'N/A' obj_source_node.model = model obj_source_node.file_type = 'pdf' obj_source_node.gcsBucket = gcs_bucket_name @@ -116,7 +114,6 @@ def create_source_node_graph_web_url(graph, model, source_url, source_type): obj_source_node.file_type = 'text' obj_source_node.file_source = source_type obj_source_node.model = model - obj_source_node.total_pages = 1 obj_source_node.url = urllib.parse.unquote(source_url) obj_source_node.created_at = datetime.now() obj_source_node.file_name = pages[0].metadata['title'] @@ -139,7 +136,6 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type): obj_source_node.file_type = 'text' obj_source_node.file_source = source_type obj_source_node.model = model - obj_source_node.total_pages = 1 obj_source_node.url = youtube_url obj_source_node.created_at = datetime.now() match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',obj_source_node.url) @@ -177,7 +173,6 @@ def create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type obj_source_node.file_type = 'text' obj_source_node.file_source = source_type obj_source_node.file_size = sys.getsizeof(pages[0].page_content) - obj_source_node.total_pages = len(pages) obj_source_node.model = model obj_source_node.url = urllib.parse.unquote(pages[0].metadata['source']) obj_source_node.created_at = datetime.now() @@ -289,8 +284,7 @@ def processing_source(uri, userName, password, database, model, file_name, pages status = "Processing" obj_source_node.file_name = file_name obj_source_node.status = status - obj_source_node.total_chunks = total_chunks - obj_source_node.total_pages = len(pages) + obj_source_node.total_chunks = len(chunks) obj_source_node.model = model if retry_condition == START_FROM_LAST_PROCESSED_POSITION: node_count = result[0]['nodeCount'] diff --git a/docs/backend/backend_docs.adoc b/docs/backend/backend_docs.adoc index a2887d6f9..91b39260c 100644 --- a/docs/backend/backend_docs.adoc +++ b/docs/backend/backend_docs.adoc @@ -580,7 +580,6 @@ The API provides a continuous update on the extraction status of a specified fil "relationshipCount": 0, "model": "OpenAI GPT 3.5", "total_chunks": 3, - "total_pages": 1, "fileSize": 92373, "processed_chunk": 0 } From eb48190fcf2f99f18e0f167e21dc15dd060d0cea Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Wed, 4 Sep 2024 17:16:03 +0530 Subject: [PATCH 046/292] Update main.py --- backend/src/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/src/main.py b/backend/src/main.py index caf5d58dc..5eb8fd59d 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -284,7 +284,7 @@ def processing_source(uri, userName, password, database, model, file_name, pages status = "Processing" obj_source_node.file_name = file_name obj_source_node.status = status - obj_source_node.total_chunks = len(chunks) + obj_source_node.total_chunks = total_chunks obj_source_node.model = model if retry_condition == START_FROM_LAST_PROCESSED_POSITION: node_count = result[0]['nodeCount'] @@ -623,4 +623,4 @@ def set_status_retry(graph, file_name, retry_condition): obj_source_node.node_count=0 obj_source_node.relationship_count=0 logging.info(obj_source_node) - graphDb_data_Access.update_source_node(obj_source_node) \ No newline at end of file + graphDb_data_Access.update_source_node(obj_source_node) From 5ff0014155888956778c0836eb183696d42f846d Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Thu, 5 Sep 2024 09:51:14 +0000 Subject: [PATCH 047/292] Add print statement for document status --- backend/score.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/score.py b/backend/score.py index 6d2cd79a1..15b381e83 100644 --- a/backend/score.py +++ b/backend/score.py @@ -437,7 +437,8 @@ async def generate(): graph = create_graph_database_connection(uri, userName, decoded_password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.get_current_status_document_node(file_name) - if result is not None: + print(f'Result of document status in SSE : {result}') + if len(result) > 0: status = json.dumps({'fileName':file_name, 'status':result[0]['Status'], 'processingTime':result[0]['processingTime'], @@ -495,7 +496,7 @@ async def get_document_status(file_name, url, userName, password, database): graph = create_graph_database_connection(uri, userName, decoded_password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.get_current_status_document_node(file_name) - if result is not None: + if len(result) > 0: status = {'fileName':file_name, 'status':result[0]['Status'], 'processingTime':result[0]['processingTime'], @@ -509,6 +510,7 @@ async def get_document_status(file_name, url, userName, password, database): } else: status = {'fileName':file_name, 'status':'Failed'} + print(f'Result of document status in refresh : {result}') return create_api_response('Success',message="",file_name=status) except Exception as e: message=f"Unable to get the document status" From 8215abd1a32ab2302024c6473d0e173510512b45 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 5 Sep 2024 11:40:13 +0000 Subject: [PATCH 048/292] refactored qa integration --- backend/src/QA_integration.py | 342 ---------------- backend/src/QA_integration_new.py | 659 ++++++++++++++++++------------ backend/src/QA_optimization.py | 217 ---------- backend/src/shared/constants.py | 3 + 4 files changed, 408 insertions(+), 813 deletions(-) delete mode 100644 backend/src/QA_integration.py delete mode 100644 backend/src/QA_optimization.py diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py deleted file mode 100644 index 6a70c3ec0..000000000 --- a/backend/src/QA_integration.py +++ /dev/null @@ -1,342 +0,0 @@ -from langchain_community.vectorstores.neo4j_vector import Neo4jVector -from langchain.chains import GraphCypherQAChain -from langchain.graphs import Neo4jGraph -import os - -from dotenv import load_dotenv -from langchain.chains import RetrievalQA -from langchain.chains import RetrievalQAWithSourcesChain -from langchain_openai import ChatOpenAI -from langchain_openai import OpenAIEmbeddings -from langchain_google_vertexai import VertexAIEmbeddings -from langchain_google_vertexai import ChatVertexAI -from langchain_google_vertexai import HarmBlockThreshold, HarmCategory -import logging -from langchain_community.chat_message_histories import Neo4jChatMessageHistory -from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings -from src.shared.common_fn import load_embedding_model -import re -from typing import Any -from datetime import datetime -import time - -load_dotenv() - -openai_api_key = os.environ.get('OPENAI_API_KEY') - -EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') -EMBEDDING_FUNCTION , _ = load_embedding_model(EMBEDDING_MODEL) -CHAT_MAX_TOKENS = 1000 - - -# RETRIEVAL_QUERY = """ -# WITH node, score, apoc.text.join([ (node)-[:__HAS_ENTITY__]->(e) | head(labels(e)) + ": "+ e.id],", ") as entities -# MATCH (node)-[:__PART_OF__]->(d:Document) -# WITH d, apoc.text.join(collect(node.text + "\n" + entities),"\n----\n") as text, avg(score) as score -# RETURN text, score, {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName)} as metadata -# """ - -RETRIEVAL_QUERY = """ -WITH node as chunk, score -MATCH (chunk)-[:__PART_OF__]->(d:__Document__) -CALL { WITH chunk -MATCH (chunk)-[:__HAS_ENTITY__]->(e) -MATCH path=(e)(()-[rels:!__HAS_ENTITY__&!__PART_OF__]-()){0,3}(:!__Chunk__&!__Document__) -UNWIND rels as r -RETURN collect(distinct r) as rels -} -WITH d, collect(distinct chunk) as chunks, avg(score) as score, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels -WITH d, score, -[c in chunks | c.text] as texts, -[r in rels | coalesce(apoc.coll.removeAll(labels(startNode(r)),['__Entity__'])[0],"") +":"+ startNode(r).id + " "+ type(r) + " " + coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" + endNode(r).id] as entities -WITH d, score, -apoc.text.join(texts,"\n----\n") + -apoc.text.join(entities,"\n") -as text, entities -RETURN text, score, {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), entities:entities} as metadata -""" - -FINAL_PROMPT = """ -You are an AI-powered question-answering agent tasked with providing accurate and direct responses to user queries. Utilize information from the chat history, current user input, and Relevant Information effectively. - -Response Requirements: -- Deliver concise and direct answers to the user's query without headers unless requested. -- Acknowledge and utilize relevant previous interactions based on the chat history summary. -- Respond to initial greetings appropriately, but avoid including a greeting in subsequent responses unless the chat is restarted or significantly paused. -- For non-general questions, strive to provide answers using chat history and Relevant Information ONLY do not Hallucinate. -- Clearly state if an answer is unknown; avoid speculating. - -Instructions: -- Prioritize directly answering the User Input: {question}. -- Use the Chat History Summary: {chat_summary} to provide context-aware responses. -- Refer to Relevant Information: {vector_result} only if it directly relates to the query. -- Cite sources clearly when using Relevant Information in your response [Sources: {sources}] without fail. The Source must be printed only at the last in the format [Source: source1,source2] . Duplicate sources should be removed. -Ensure that answers are straightforward and context-aware, focusing on being relevant and concise. -""" - -def get_llm(model: str,max_tokens=1000) -> Any: - """Retrieve the specified language model based on the model name.""" - - model_versions = { - "openai-gpt-3.5": "gpt-3.5-turbo-16k", - "gemini-1.0-pro": "gemini-1.0-pro-001", - "gemini-1.5-pro": "gemini-1.5-pro-preview-0409", - "openai-gpt-4": "gpt-4-0125-preview", - "diffbot" : "gpt-4-0125-preview", - "openai-gpt-4o":"gpt-4o", - "openai-gpt-4o-mini": "gpt-4o-mini", - } - if model in model_versions: - model_version = model_versions[model] - logging.info(f"Chat Model: {model}, Model Version: {model_version}") - - if "Gemini" in model: - llm = ChatVertexAI( - model_name=model_version, - convert_system_message_to_human=True, - max_tokens=max_tokens, - temperature=0, - safety_settings={ - HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE - } - ) - else: - llm = ChatOpenAI(model=model_version, temperature=0,max_tokens=max_tokens) - - return llm,model_version - - else: - logging.error(f"Unsupported model: {model}") - return None,None - -def vector_embed_results(qa,question): - vector_res={} - try: - result = qa({"query": question}) - vector_res['result']=result.get("result") - - sources = set() - entities = set() - for document in result["source_documents"]: - sources.add(document.metadata["source"]) - for entiti in document.metadata["entities"]: - entities.add(entiti) - vector_res['source']=list(sources) - vector_res['entities'] = list(entities) - if len( vector_res['entities']) > 5: - vector_res['entities'] = vector_res['entities'][:5] - - # list_source_docs=[] - # for i in result["source_documents"]: - # list_source_docs.append(i.metadata['source']) - # vector_res['source']=list_source_docs - - # result = qa({"question":question},return_only_outputs=True) - # vector_res['result'] = result.get("answer") - # vector_res["source"] = result.get("sources") - except Exception as e: - error_message = str(e) - logging.exception(f'Exception in vector embedding in QA component:{error_message}') - # raise Exception(error_message) - - return vector_res - -def save_chat_history(history,user_message,ai_message): - try: - # history = Neo4jChatMessageHistory( - # graph=graph, - # session_id=session_id - # ) - history.add_user_message(user_message) - history.add_ai_message(ai_message) - logging.info(f'Successfully saved chat history') - except Exception as e: - error_message = str(e) - logging.exception(f'Exception in saving chat history:{error_message}') - -def get_chat_history(llm, history): - """Retrieves and summarizes the chat history for a given session.""" - - try: - # history = Neo4jChatMessageHistory( - # graph=graph, - # session_id=session_id - # ) - chat_history = history.messages - - if not chat_history: - return "" - - if len(chat_history) > 4: - chat_history = chat_history[-4:] - - condense_template = f""" - Given the following earlier conversation, summarize the chat history. - Make sure to include all relevant information. - Chat History: {chat_history} - """ - chat_summary = llm.predict(condense_template) - return chat_summary - - except Exception as e: - logging.exception(f"Exception in retrieving chat history: {e}") - return "" - -def clear_chat_history(graph, session_id): - - try: - logging.info(f"Clearing chat history for session ID: {session_id}") - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id - ) - history.clear() - logging.info("Chat history cleared successfully") - - return { - "session_id": session_id, - "message": "The chat history is cleared", - "user": "chatbot" - } - except Exception as e: - logging.exception(f"Error occurred while clearing chat history for session ID {session_id}: {e}") - - -def extract_and_remove_source(message): - pattern = r'\[Source: ([^\]]+)\]' - match = re.search(pattern, message) - if match: - sources_string = match.group(1) - sources = [source.strip().strip("'") for source in sources_string.split(',')] - new_message = re.sub(pattern, '', message).strip() - response = { - "message" : new_message, - "sources" : sources - } - else: - response = { - "message" : message, - "sources" : [] - } - return response - -def clear_chat_history(graph,session_id): - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id - ) - history.clear() - return { - "session_id": session_id, - "message": "The chat History is cleared", - "user": "chatbot" - } - -def QA_RAG(graph,model,question,session_id): - logging.info(f"QA_RAG called at {datetime.now()}") - # model = "Gemini Pro" - try: - qa_rag_start_time = time.time() - - - start_time = time.time() - neo_db = Neo4jVector.from_existing_index( - embedding=EMBEDDING_FUNCTION, - index_name="vector", - retrieval_query=RETRIEVAL_QUERY, - graph=graph - ) - - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id - ) - - llm,model_version = get_llm(model=model,max_tokens=CHAT_MAX_TOKENS) - - qa = RetrievalQA.from_chain_type( - llm=llm, - chain_type="stuff", - retriever=neo_db.as_retriever(search_kwargs={'k': 3, "score_threshold": 0.7}), - return_source_documents=True - ) - # qa = RetrievalQAWithSourcesChain.from_chain_type( - # llm=llm, - # chain_type="stuff", - # retriever=neo_db.as_retriever(search_kwargs={'k': 3, "score_threshold": 0.7})) - - db_setup_time = time.time() - start_time - logging.info(f"DB Setup completed in {db_setup_time:.2f} seconds") - - start_time = time.time() - chat_summary = get_chat_history(llm,history) - chat_history_time = time.time() - start_time - logging.info(f"Chat history summarized in {chat_history_time:.2f} seconds") - # print(chat_summary) - - start_time = time.time() - vector_res = vector_embed_results(qa, question) - vector_time = time.time() - start_time - logging.info(f"Vector response obtained in {vector_time:.2f} seconds") - # print(vector_res) - - formatted_prompt = FINAL_PROMPT.format( - question=question, - chat_summary=chat_summary, - vector_result=vector_res.get('result', ''), - sources=vector_res.get('source', '') - ) - # print(formatted_prompt) - - start_time = time.time() - # llm = get_llm(model=model,embedding=False) - response = llm.predict(formatted_prompt) - predict_time = time.time() - start_time - logging.info(f"Response predicted in {predict_time:.2f} seconds") - - start_time = time.time() - ai_message = response - user_message = question - save_chat_history(history, user_message, ai_message) - chat_history_save = time.time() - start_time - logging.info(f"Chat History saved in {chat_history_save:.2f} seconds") - - response_data = extract_and_remove_source(response) - message = response_data["message"] - sources = response_data["sources"] - - print(f"message : {message}") - print(f"sources : {sources}") - total_call_time = time.time() - qa_rag_start_time - logging.info(f"Total Response time is {total_call_time:.2f} seconds") - return { - "session_id": session_id, - "message": message, - "info": { - "sources": sources, - "model":model_version, - "entities":vector_res["entities"] - }, - "user": "chatbot" - } - - except Exception as e: - logging.exception(f"Exception in QA component at {datetime.now()}: {str(e)}") - error_name = type(e).__name__ - return { - "session_id": session_id, - "message": "Something went wrong", - "info": { - "sources": [], - "error": f"{error_name} :- {str(e)}" - }, - "user": "chatbot"} - - - - - diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration_new.py index eeac78c1e..6fe4907f1 100644 --- a/backend/src/QA_integration_new.py +++ b/backend/src/QA_integration_new.py @@ -1,5 +1,4 @@ from langchain_community.vectorstores.neo4j_vector import Neo4jVector -from langchain.graphs import Neo4jGraph import os from dotenv import load_dotenv import logging @@ -38,50 +37,178 @@ EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') EMBEDDING_FUNCTION , _ = load_embedding_model(EMBEDDING_MODEL) - -def get_neo4j_retriever(graph, retrieval_query,document_names,mode,index_name="vector",keyword_index="keyword", search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): +def get_total_tokens(ai_response, llm): try: - if mode == "fulltext" or mode == "graph + vector + fulltext": - neo_db = Neo4jVector.from_existing_graph( - embedding=EMBEDDING_FUNCTION, - index_name=index_name, - retrieval_query=retrieval_query, - graph=graph, - search_type="hybrid", - node_label="Chunk", - embedding_node_property="embedding", - text_node_properties=["text"], - keyword_index_name=keyword_index - ) - # neo_db = Neo4jVector.from_existing_index( - # embedding=EMBEDDING_FUNCTION, - # index_name=index_name, - # retrieval_query=retrieval_query, - # graph=graph, - # search_type="hybrid", - # keyword_index_name=keyword_index - # ) - logging.info(f"Successfully retrieved Neo4jVector index '{index_name}' and keyword index '{keyword_index}'") + if isinstance(llm, (ChatOpenAI, AzureChatOpenAI, ChatFireworks, ChatGroq)): + total_tokens = ai_response.response_metadata.get('token_usage', {}).get('total_tokens', 0) + + elif isinstance(llm, ChatVertexAI): + total_tokens = ai_response.response_metadata.get('usage_metadata', {}).get('prompt_token_count', 0) + + elif isinstance(llm, ChatBedrock): + total_tokens = ai_response.response_metadata.get('usage', {}).get('total_tokens', 0) + + elif isinstance(llm, ChatAnthropic): + input_tokens = int(ai_response.response_metadata.get('usage', {}).get('input_tokens', 0)) + output_tokens = int(ai_response.response_metadata.get('usage', {}).get('output_tokens', 0)) + total_tokens = input_tokens + output_tokens + + elif isinstance(llm, ChatOllama): + total_tokens = ai_response.response_metadata.get("prompt_eval_count", 0) + else: - neo_db = Neo4jVector.from_existing_index( - embedding=EMBEDDING_FUNCTION, - index_name=index_name, - retrieval_query=retrieval_query, - graph=graph + logging.warning(f"Unrecognized language model: {type(llm)}. Returning 0 tokens.") + total_tokens = 0 + + except Exception as e: + logging.error(f"Error retrieving total tokens: {e}") + total_tokens = 0 + + return total_tokens + +def clear_chat_history(graph, session_id): + try: + history = Neo4jChatMessageHistory( + graph=graph, + session_id=session_id + ) + + history.clear() + + return { + "session_id": session_id, + "message": "The chat history has been cleared.", + "user": "chatbot" + } + + except Exception as e: + logging.error(f"Error clearing chat history for session {session_id}: {e}") + return { + "session_id": session_id, + "message": "Failed to clear chat history.", + "user": "chatbot" + } + +def get_sources_and_chunks(sources_used, docs): + chunkdetails_list = [] + + sources_used_set = set(sources_used) + + for doc in docs: + try: + source = doc.metadata.get("source") + chunkdetails = doc.metadata.get("chunkdetails", []) + + if source in sources_used_set: + formatted_chunkdetails = [ + {**chunkdetail, "score": round(chunkdetail.get("score", 0), 4)} + for chunkdetail in chunkdetails + ] + chunkdetails_list.extend(formatted_chunkdetails) + + except Exception as e: + logging.error(f"Error processing document: {e}", exc_info=True) + + result = { + 'sources': sources_used, + 'chunkdetails': chunkdetails_list + } + return result + + +def get_rag_chain(llm, system_template=CHAT_SYSTEM_TEMPLATE): + try: + question_answering_prompt = ChatPromptTemplate.from_messages( + [ + ("system", system_template), + MessagesPlaceholder(variable_name="messages"), + ( + "human", + "User question: {input}" + ), + ] + ) + + question_answering_chain = question_answering_prompt | llm + + return question_answering_chain + + except Exception as e: + logging.error(f"Error creating RAG chain: {e}") + raise + +def format_documents(documents, model): + prompt_token_cutoff = 4 + for model_names, value in CHAT_TOKEN_CUT_OFF.items(): + if model in model_names: + prompt_token_cutoff = value + break + + sorted_documents = sorted(documents, key=lambda doc: doc.state.get("query_similarity_score", 0), reverse=True) + sorted_documents = sorted_documents[:prompt_token_cutoff] + + formatted_docs = [] + sources = set() + + for doc in sorted_documents: + try: + source = doc.metadata.get('source', 'unknown') + sources.add(source) + + formatted_doc = ( + "Document start\n" + f"This Document belongs to the source {source}\n" + f"Content: {doc.page_content}\n" + "Document end\n" ) - logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'") - document_names= list(map(str.strip, json.loads(document_names))) - if document_names: - retriever = neo_db.as_retriever(search_type="similarity_score_threshold",search_kwargs={'k': search_k, "score_threshold": score_threshold,'filter':{'fileName': {'$in': document_names}}}) - logging.info(f"Successfully created retriever for index '{index_name}' with search_k={search_k}, score_threshold={score_threshold} for documents {document_names}") - else: - retriever = neo_db.as_retriever(search_type="similarity_score_threshold",search_kwargs={'k': search_k, "score_threshold": score_threshold}) - logging.info(f"Successfully created retriever for index '{index_name}' with search_k={search_k}, score_threshold={score_threshold}") - return retriever + formatted_docs.append(formatted_doc) + + except Exception as e: + logging.error(f"Error formatting document: {e}") + + return "\n\n".join(formatted_docs), sources + +def process_documents(docs, question, messages, llm, model): + start_time = time.time() + + try: + formatted_docs, sources = format_documents(docs, model) + + rag_chain = get_rag_chain(llm=llm) + + ai_response = rag_chain.invoke({ + "messages": messages[:-1], + "context": formatted_docs, + "input": question + }) + + result = get_sources_and_chunks(sources, docs) + content = ai_response.content + total_tokens = get_total_tokens(ai_response, llm) + + predict_time = time.time() - start_time + logging.info(f"Final response predicted in {predict_time:.2f} seconds") + except Exception as e: - logging.error(f"Error retrieving Neo4jVector index '{index_name}' or creating retriever: {e}") - raise Exception("An error occurred while retrieving the Neo4jVector index '{index_name}' or creating the retriever. Please drop and create a new vector index: {e}") from e + logging.error(f"Error processing documents: {e}") + raise + return content, result, total_tokens + +def retrieve_documents(doc_retriever, messages): + + start_time = time.time() + try: + docs = doc_retriever.invoke({"messages": messages}) + doc_retrieval_time = time.time() - start_time + logging.info(f"Documents retrieved in {doc_retrieval_time:.2f} seconds") + + except Exception as e: + logging.error(f"Error retrieving documents: {e}") + raise + + return docs + def create_document_retriever_chain(llm, retriever): try: logging.info("Starting to create document retriever chain") @@ -124,182 +251,176 @@ def create_document_retriever_chain(llm, retriever): logging.error(f"Error creating document retriever chain: {e}", exc_info=True) raise - -def create_neo4j_chat_message_history(graph, session_id): - """ - Creates and returns a Neo4jChatMessageHistory instance. - - """ +def initialize_neo4j_vector(graph, chat_mode_settings): try: + mode = chat_mode_settings.get('mode', 'undefined') + retrieval_query = chat_mode_settings.get("retrieval_query") + index_name = chat_mode_settings.get("index_name") + keyword_index = chat_mode_settings.get("keyword_index", "") - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id - ) - return history + if not retrieval_query or not index_name: + raise ValueError("Required settings 'retrieval_query' or 'index_name' are missing.") - except Exception as e: - logging.error(f"Error creating Neo4jChatMessageHistory: {e}") - raise - -def format_documents(documents,model): - prompt_token_cutoff = 4 - for models,value in CHAT_TOKEN_CUT_OFF.items(): - if model in models: - prompt_token_cutoff = value - - sorted_documents = sorted(documents, key=lambda doc: doc.state["query_similarity_score"], reverse=True) - sorted_documents = sorted_documents[:prompt_token_cutoff] - - formatted_docs = [] - sources = set() + if mode in ["fulltext", "graph+vector+fulltext"]: + neo_db = Neo4jVector.from_existing_graph( + embedding=EMBEDDING_FUNCTION, + index_name=index_name, + retrieval_query=retrieval_query, + graph=graph, + search_type="hybrid", + node_label="Chunk", + embedding_node_property="embedding", + text_node_properties=["text"], + keyword_index_name=keyword_index + ) + logging.info(f"Successfully retrieved Neo4jVector Fulltext index '{index_name}' and keyword index '{keyword_index}'") + else: + neo_db = Neo4jVector.from_existing_index( + embedding=EMBEDDING_FUNCTION, + index_name=index_name, + retrieval_query=retrieval_query, + graph=graph + ) + logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'") - for doc in sorted_documents: - source = doc.metadata['source'] - sources.add(source) - - formatted_doc = ( - "Document start\n" - f"This Document belongs to the source {source}\n" - f"Content: {doc.page_content}\n" - "Document end\n" + except Exception as e: + logging.error(f"Error retrieving Neo4jVector index : {e}") + raise + return neo_db + +def create_retriever(neo_db, document_names, search_k, score_threshold): + if document_names: + retriever = neo_db.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={ + 'k': search_k, + 'score_threshold': score_threshold, + 'filter': {'fileName': {'$in': document_names}} + } + ) + logging.info(f"Successfully created retriever with search_k={search_k}, score_threshold={score_threshold} for documents {document_names}") + else: + retriever = neo_db.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={'k': search_k, 'score_threshold': score_threshold} ) - formatted_docs.append(formatted_doc) + logging.info(f"Successfully created retriever with search_k={search_k}, score_threshold={score_threshold}") + return retriever - return "\n\n".join(formatted_docs), sources +def get_neo4j_retriever(graph, document_names,chat_mode_settings, search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): + try: -def get_rag_chain(llm,system_template=CHAT_SYSTEM_TEMPLATE): - question_answering_prompt = ChatPromptTemplate.from_messages( - [ - ("system", system_template), - MessagesPlaceholder(variable_name="messages"), - ( - "human", - "User question: {input}" - ), - ] - ) - question_answering_chain = question_answering_prompt | llm + neo_db = initialize_neo4j_vector(graph, chat_mode_settings) + document_names= list(map(str.strip, json.loads(document_names))) + retriever = create_retriever(neo_db, document_names, search_k, score_threshold) + return retriever + except Exception as e: + logging.error(f"Error retrieving Neo4jVector index or creating retriever: {e}") + raise Exception("An error occurred while retrieving the Neo4jVector index or creating the retriever. Please drop and create a new vector index: {e}") from e - return question_answering_chain -def get_sources_and_chunks(sources_used, docs): - chunkdetails_list = [] - sources_used_set = set(sources_used) +def setup_chat(model, graph, document_names, chat_mode_settings): + start_time = time.time() + try: + if model == "diffbot": + model = "openai-gpt-4o" + + llm, model_name = get_llm(model=model) + logging.info(f"Model called in chat: {model} (version: {model_name})") - for doc in docs: - source = doc.metadata["source"] - chunkdetails = doc.metadata["chunkdetails"] - if source in sources_used_set: - chunkdetails = [{**chunkdetail, "score": round(chunkdetail["score"], 4)} for chunkdetail in chunkdetails] - chunkdetails_list.extend(chunkdetails) + retriever = get_neo4j_retriever(graph=graph, chat_mode_settings=chat_mode_settings, document_names=document_names) + doc_retriever = create_document_retriever_chain(llm, retriever) + + chat_setup_time = time.time() - start_time + logging.info(f"Chat setup completed in {chat_setup_time:.2f} seconds") + + except Exception as e: + logging.error(f"Error during chat setup: {e}", exc_info=True) + raise + + return llm, doc_retriever, model_name - result = { - 'sources': sources_used, - 'chunkdetails': chunkdetails_list - } - return result +def process_chat_response(messages,history, question, model, graph, document_names,chat_mode_settings): + try: + llm, doc_retriever, model_version = setup_chat(model, graph, document_names,chat_mode_settings) + + docs = retrieve_documents(doc_retriever, messages) + + if docs: + content, result, total_tokens = process_documents(docs, question, messages, llm, model) + else: + content = "I couldn't find any relevant documents to answer your question." + result = {"sources": [], "chunkdetails": []} + total_tokens = 0 + + ai_response = AIMessage(content=result["message"]) + messages.append(ai_response) + summarize_and_log(history, messages, llm) + + return { + "session_id": "", + "message": content, + "info": { + "sources": result["sources"], + "model": model_version, + "chunkdetails": result["chunkdetails"], + "total_tokens": total_tokens, + "response_time": 0, + "mode": chat_mode_settings["mode"] + }, + "user": "chatbot" + } + + except Exception as e: + logging.exception(f"Error processing chat response at {datetime.now()}: {str(e)}") + return { + "session_id": "", + "message": "Something went wrong", + "info": { + "sources": [], + "chunkdetails": [], + "total_tokens": 0, + "response_time": 0, + "error": f"{type(e).__name__}: {str(e)}", + "mode": chat_mode_settings["mode"] + }, + "user": "chatbot" + } -def summarize_messages(llm,history,stored_messages): - if len(stored_messages) == 0: +def summarize_and_log(history, stored_messages, llm): + if not stored_messages: + logging.info("No messages to summarize.") return False - summarization_prompt = ChatPromptTemplate.from_messages( - [ - MessagesPlaceholder(variable_name="chat_history"), - ( - "human", - "Summarize the above chat messages into a concise message, focusing on key points and relevant details that could be useful for future conversations. Exclude all introductions and extraneous information." - ), - ] - ) - - summarization_chain = summarization_prompt | llm - - summary_message = summarization_chain.invoke({"chat_history": stored_messages}) - - history.clear() - history.add_user_message("Our current convertaion summary till now") - history.add_message(summary_message) - return True - - -def get_total_tokens(ai_response,llm): - - if isinstance(llm,(ChatOpenAI,AzureChatOpenAI,ChatFireworks,ChatGroq)): - total_tokens = ai_response.response_metadata['token_usage']['total_tokens'] - elif isinstance(llm,(ChatVertexAI)): - total_tokens = ai_response.response_metadata['usage_metadata']['prompt_token_count'] - elif isinstance(llm,(ChatBedrock)): - total_tokens = ai_response.response_metadata['usage']['total_tokens'] - elif isinstance(llm,(ChatAnthropic)): - input_tokens = int(ai_response.response_metadata['usage']['input_tokens']) - output_tokens = int(ai_response.response_metadata['usage']['output_tokens']) - total_tokens = input_tokens + output_tokens - elif isinstance(llm,(ChatOllama)): - total_tokens = ai_response.response_metadata["prompt_eval_count"] - else: - total_tokens = 0 - return total_tokens + try: + start_time = time.time() -def clear_chat_history(graph,session_id): - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id + summarization_prompt = ChatPromptTemplate.from_messages( + [ + MessagesPlaceholder(variable_name="chat_history"), + ( + "human", + "Summarize the above chat messages into a concise message, focusing on key points and relevant details that could be useful for future conversations. Exclude all introductions and extraneous information." + ), + ] ) - history.clear() - return { - "session_id": session_id, - "message": "The chat History is cleared", - "user": "chatbot" - } + summarization_chain = summarization_prompt | llm -def setup_chat(model, graph, document_names,retrieval_query,mode): - start_time = time.time() - if model in ["diffbot"]: - model = "openai-gpt-4o" - llm,model_name = get_llm(model) - logging.info(f"Model called in chat {model} and model version is {model_name}") - retriever = get_neo4j_retriever(graph=graph,retrieval_query=retrieval_query,document_names=document_names,mode=mode) - doc_retriever = create_document_retriever_chain(llm, retriever) - chat_setup_time = time.time() - start_time - logging.info(f"Chat setup completed in {chat_setup_time:.2f} seconds") - - return llm, doc_retriever, model_name + summary_message = summarization_chain.invoke({"chat_history": stored_messages}) -def retrieve_documents(doc_retriever, messages): - start_time = time.time() - docs = doc_retriever.invoke({"messages": messages}) - doc_retrieval_time = time.time() - start_time - logging.info(f"Documents retrieved in {doc_retrieval_time:.2f} seconds") - return docs + history.clear() + history.add_user_message("Our current conversation summary till now") + history.add_message(summary_message) -def process_documents(docs, question, messages, llm,model): - start_time = time.time() - formatted_docs, sources = format_documents(docs,model) - rag_chain = get_rag_chain(llm=llm) - ai_response = rag_chain.invoke({ - "messages": messages[:-1], - "context": formatted_docs, - "input": question - }) - result = get_sources_and_chunks(sources, docs) - content = ai_response.content - total_tokens = get_total_tokens(ai_response,llm) - - - predict_time = time.time() - start_time - logging.info(f"Final Response predicted in {predict_time:.2f} seconds") - - return content, result, total_tokens - -def summarize_and_log(history, messages, llm): - start_time = time.time() - summarize_messages(llm, history, messages) - history_summarized_time = time.time() - start_time - logging.info(f"Chat History summarized in {history_summarized_time:.2f} seconds") + history_summarized_time = time.time() - start_time + logging.info(f"Chat History summarized in {history_summarized_time:.2f} seconds") + return True + except Exception as e: + logging.error(f"An error occurred while summarizing messages: {e}") + return False + def create_graph_chain(model, graph): try: logging.info(f"Graph QA Chain using LLM model: {model}") @@ -346,79 +467,109 @@ def get_graph_response(graph_chain, question): except Exception as e: logging.error("An error occurred while getting the graph response : {e}") -def QA_RAG(graph, model, question, document_names,session_id, mode): +def process_graph_response(model, graph, question, messages, history): try: - logging.info(f"Chat Mode : {mode}") - history = create_neo4j_chat_message_history(graph, session_id) - messages = history.messages - user_question = HumanMessage(content=question) - messages.append(user_question) - - if mode == "graph": - graph_chain, qa_llm,model_version = create_graph_chain(model,graph) - graph_response = get_graph_response(graph_chain,question) - ai_response = AIMessage(content=graph_response["response"]) if graph_response["response"] else AIMessage(content="Something went wrong") - messages.append(ai_response) - summarize_and_log(history, messages, qa_llm) - - result = { - "session_id": session_id, - "message": graph_response["response"], - "info": { - "model": model_version, - 'cypher_query':graph_response["cypher_query"], - "context" : graph_response["context"], - "mode" : mode, - "response_time": 0 - }, - "user": "chatbot" - } - return result - elif mode == "vector" or mode == "fulltext": - retrieval_query = VECTOR_SEARCH_QUERY - else: - retrieval_query = VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT) - - llm, doc_retriever, model_version = setup_chat(model, graph, document_names,retrieval_query,mode) + graph_chain, qa_llm, model_version = create_graph_chain(model, graph) - docs = retrieve_documents(doc_retriever, messages) - - if docs: - content, result, total_tokens = process_documents(docs, question, messages, llm,model) - else: - content = "I couldn't find any relevant documents to answer your question." - result = {"sources": [], "chunkdetails": []} - total_tokens = 0 + graph_response = get_graph_response(graph_chain, question) + + ai_response_content = graph_response.get("response", "Something went wrong") + ai_response = AIMessage(content=ai_response_content) - ai_response = AIMessage(content=content) messages.append(ai_response) - summarize_and_log(history, messages, llm) + summarize_and_log(history, messages, qa_llm) - return { - "session_id": session_id, - "message": content, + result = { + "session_id": "", + "message": ai_response_content, "info": { - "sources": result["sources"], "model": model_version, - "chunkdetails": result["chunkdetails"], - "total_tokens": total_tokens, - "response_time": 0, - "mode": mode + "cypher_query": graph_response.get("cypher_query", ""), + "context": graph_response.get("context", ""), + "mode": "graph", + "response_time": 0 }, "user": "chatbot" } - + + return result + except Exception as e: - logging.exception(f"Exception in QA component at {datetime.now()}: {str(e)}") - error_name = type(e).__name__ + logging.exception(f"Error processing graph response at {datetime.now()}: {str(e)}") return { - "session_id": session_id, + "session_id": "", "message": "Something went wrong", "info": { - "sources": [], - "chunkids": [], - "error": f"{error_name} :- {str(e)}", - "mode": mode + "model": model_version, + "cypher_query": "", + "context": "", + "mode": "graph", + "response_time": 0, + "error": f"{type(e).__name__}: {str(e)}" }, "user": "chatbot" } + +def create_neo4j_chat_message_history(graph, session_id): + """ + Creates and returns a Neo4jChatMessageHistory instance. + + """ + try: + + history = Neo4jChatMessageHistory( + graph=graph, + session_id=session_id + ) + return history + + except Exception as e: + logging.error(f"Error creating Neo4jChatMessageHistory: {e}") + raise + +def get_chat_mode_settings(mode): + chat_mode_settings = {} + + # logging.info(f"Received mode: {mode}") + + try: + if mode in ["vector", "fulltext"]: + chat_mode_settings["retrieval_query"] = VECTOR_SEARCH_QUERY + chat_mode_settings["index_name"] = "vector" + chat_mode_settings["keyword_index"] = "keyword" + elif mode == "local_community_search": + chat_mode_settings["retrieval_query"] = LOCAL_COMMUNITY_SEARCH_QUERY + chat_mode_settings["index_name"] = "entity_vector" + chat_mode_settings["keyword_index"] = "" + elif mode in ['graph+vector', 'graph+vector+fulltext']: + chat_mode_settings["retrieval_query"] = VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT) + chat_mode_settings["index_name"] = "vector" + chat_mode_settings["keyword_index"] = "keyword" + else: + raise ValueError(f"Invalid mode: {mode}") + + logging.info(f"Chat mode settings: {chat_mode_settings}") + + except Exception as e: + logging.error(f"Unexpected error: {e}", exc_info=True) + raise e + + return chat_mode_settings + +def QA_RAG(graph, model, question, document_names, session_id, mode): + logging.info(f"Chat Mode: {mode}") + chat_mode_settings = get_chat_mode_settings(mode=mode) + + history = create_neo4j_chat_message_history(graph, session_id) + messages = history.messages + + user_question = HumanMessage(content=question) + messages.append(user_question) + + if mode == "graph": + result = process_graph_response(model, graph, question, messages, history) + else: + result = process_chat_response(messages,history, question, model, graph, document_names,chat_mode_settings) + + result["session_id"] = session_id + return result \ No newline at end of file diff --git a/backend/src/QA_optimization.py b/backend/src/QA_optimization.py deleted file mode 100644 index 70c5ffdc8..000000000 --- a/backend/src/QA_optimization.py +++ /dev/null @@ -1,217 +0,0 @@ -import os -from langchain_community.vectorstores.neo4j_vector import Neo4jVector -from langchain.chains import GraphCypherQAChain -from langchain.graphs import Neo4jGraph -from langchain.chains import RetrievalQA -from langchain_openai import ChatOpenAI -from langchain_openai import OpenAIEmbeddings -import logging -from langchain_community.chat_message_histories import Neo4jChatMessageHistory -import asyncio -from datetime import datetime -from dotenv import load_dotenv -load_dotenv() - -# openai_api_key = os.environ.get('OPENAI_API_KEY') -# model_version='gpt-4-0125-preview' - -class ParallelComponent: - - def __init__(self, uri, userName, password, question, session_id): - self.uri = uri - self.userName = userName - self.password = password - self.question = question - self.session_id = session_id - self.model_version='gpt-4-0125-preview' - self.llm = ChatOpenAI(model= self.model_version, temperature=0) - - # async def execute(self): - # tasks = [] - - # tasks.append(asyncio.create_task(self._vector_embed_results())) - # tasks.append(asyncio.create_task(self._cypher_results())) - # tasks.append(asyncio.create_task(self._get_chat_history())) - - # return await asyncio.gather(*tasks) - async def execute(self): - tasks = [ - self._vector_embed_results(), - # self._cypher_results(), ## Disabled call for cypher_results - self._get_chat_history() - ] - return await asyncio.gather(*tasks) - - async def _vector_embed_results(self): - t=datetime.now() - print("Vector embeddings start time",t) - # retrieval_query=""" - # MATCH (node)-[:__PART_OF__]->(d:Document) - # WITH d, apoc.text.join(collect(node.text),"\n----\n") as text, avg(score) as score - # RETURN text, score, {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName)} as metadata - # """ - retrieval_query=""" - WITH node, score, apoc.text.join([ (node)-[:__HAS_ENTITY__]->(e) | head(labels(e)) + ": "+ e.id],", ") as entities - MATCH (node)-[:__PART_OF__]->(d:__Document__) - WITH d, apoc.text.join(collect(node.text + "\n" + entities),"\n----\n") as text, avg(score) as score - RETURN text, score, {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName)} as metadata - """ - vector_res={} - try: - neo_db=Neo4jVector.from_existing_index( - embedding=OpenAIEmbeddings(), - url=self.uri, - username=self.userName, - password=self.password, - database="neo4j", - index_name="vector", - retrieval_query=retrieval_query, - ) - # llm = ChatOpenAI(model= model_version, temperature=0) - - qa = RetrievalQA.from_chain_type( - llm=self.llm, chain_type="stuff", retriever=neo_db.as_retriever(search_kwargs={'k': 3,"score_threshold": 0.5}), return_source_documents=True - ) - - result = qa({"query": self.question}) - vector_res['result']=result.get("result") - list_source_docs=[] - for i in result["source_documents"]: - list_source_docs.append(i.metadata['source']) - vector_res['source']=list_source_docs - except Exception as e: - error_message = str(e) - logging.exception(f'Exception in vector embedding in QA component:{error_message}') - # raise Exception(error_message) - print("Vector embeddings duration time",datetime.now()-t) - return vector_res - - - async def _cypher_results(self): - try: - t=datetime.now() - print("Cypher QA start time",t) - cypher_res={} - graph = Neo4jGraph( - url=self.uri, - username=self.userName, - password=self.password - ) - - - graph.refresh_schema() - cypher_chain = GraphCypherQAChain.from_llm( - graph=graph, - cypher_llm=ChatOpenAI(temperature=0, model=self.model_version), - qa_llm=ChatOpenAI(temperature=0, model=self.model_version), - validate_cypher=True, # Validate relationship directions - verbose=True, - top_k=2 - ) - try: - cypher_res=cypher_chain.invoke({"query": question}) - except: - cypher_res={} - - except Exception as e: - error_message = str(e) - logging.exception(f'Exception in CypherQAChain in QA component:{error_message}') - # raise Exception(error_message) - print("Cypher QA duration",datetime.now()-t) - return cypher_res - - async def _get_chat_history(self): - try: - t=datetime.now() - print("Get chat history start time:",t) - history = Neo4jChatMessageHistory( - url=self.uri, - username=self.userName, - password=self.password, - session_id=self.session_id - ) - chat_history=history.messages - - if len(chat_history)==0: - return {"result":""} - condense_template = f"""Given the following earlier conversation , Summarise the chat history.Make sure to include all the relevant information. - Chat History: - {chat_history}""" - chat_summary=self.llm.predict(condense_template) - print("Get chat history duration time:",datetime.now()-t) - return {"result":chat_summary} - except Exception as e: - error_message = str(e) - logging.exception(f'Exception in retrieving chat history:{error_message}') - # raise Exception(error_message) - return {"result":''} - - async def final_prompt(self,chat_summary,cypher_res,vector_res): - t=datetime.now() - print('Final prompt start time:',t) - final_prompt = f"""You are a helpful question-answering agent. Your task is to analyze - and synthesize information from two sources: the top result from a similarity search - (unstructured information) and relevant data from a graph database (structured information). - If structured information fails to find an answer then use the answer from unstructured information - and vice versa. I only want a straightforward answer without mentioning from which source you got the answer. You are also receiving - a chat history of the earlier conversation. You should be able to understand the context from the chat history and answer the question. - Given the user's query: {self.question}, provide a meaningful and efficient answer based - on the insights derived from the following data: - chat_summary:{chat_summary} - Structured information: {cypher_res}. - Unstructured information: {vector_res}. - - """ - print(final_prompt) - response = self.llm.predict(final_prompt) - ai_message=response - user_message=question - print('Final prompt duration',datetime.now()-t) - return ai_message,user_message - - - async def _save_chat_history(self,ai_message,user_message): - try: - history = Neo4jChatMessageHistory( - url=self.uri, - username=self.userName, - password=self.password, - session_id=self.session_id - ) - history.add_user_message(user_message) - history.add_ai_message(ai_message) - logging.info(f'Successfully saved chat history') - except Exception as e: - error_message = str(e) - logging.exception(f'Exception in saving chat history:{error_message}') - raise Exception(error_message) - -# Usage example: - -uri=os.environ.get('NEO4J_URI') -userName=os.environ.get('NEO4J_USERNAME') -password=os.environ.get('NEO4J_PASSWORD') -question='Do you know my name?' -session_id=2 - -async def main(uri,userName,password,question,session_id): - t=datetime.now() - parallel_component = ParallelComponent(uri, userName, password, question, session_id) - f_results=await parallel_component.execute() - print(f_results) - # f_vector_result=f_results[0]['result'] - # f_cypher_result=f_results[1].get('result','') - # f_chat_summary=f_results[2]['result'] - f_vector_result=f_results[0]['result'] - f_cypher_result = "" # Passing Empty string for cypher_result - f_chat_summary=f_results[1]['result'] - print(f_vector_result) - print(f_cypher_result) - print(f_chat_summary) - ai_message,user_message=await parallel_component.final_prompt(f_chat_summary,f_cypher_result,f_vector_result) - # print(asyncio.gather(asyncio.create_taskparallel_component.final_prompt(f_chat_summary,f_cypher_result,f_vector_result))) - await parallel_component._save_chat_history(ai_message,user_message) - print("Total Time taken:",datetime.now()-t) - print("Response from AI:",ai_message) -# Run with an event loop -asyncio.run(main(uri,userName,password,question,session_id)) \ No newline at end of file diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index c5f8e98a4..c4d56f963 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -276,4 +276,7 @@ RETURN text, avg_score as score, {{length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails}} AS metadata """ + +LOCAL_COMMUNITY_SEARCH_QUERY = """""" + YOUTUBE_CHUNK_SIZE_SECONDS = 60 From 6ba42e0a34934b7c0382de0e7338ba61ebcd6603 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 5 Sep 2024 11:59:03 +0000 Subject: [PATCH 049/292] allow credentials true changes --- backend/score.py | 1 - frontend/src/components/Content.tsx | 5 ++++- frontend/src/services/ServerSideStatusUpdateAPI.ts | 5 +---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/backend/score.py b/backend/score.py index 15b381e83..7db8ab069 100644 --- a/backend/score.py +++ b/backend/score.py @@ -46,7 +46,6 @@ def sick(): app.add_middleware( CORSMiddleware, allow_origins=["*"], - allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 877c1178b..12213f66b 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -125,7 +125,10 @@ const Content: React.FC = ({ useEffect(() => { setFilesData((prevfiles) => { return prevfiles.map((curfile) => { - return { ...curfile, model: curfile.status === 'New' ? model : curfile.model }; + return { + ...curfile, + model: curfile.status === 'New' || curfile.status === 'Reprocess' ? model : curfile.model, + }; }); }); }, [model]); diff --git a/frontend/src/services/ServerSideStatusUpdateAPI.ts b/frontend/src/services/ServerSideStatusUpdateAPI.ts index dfef3a40f..547a929a8 100644 --- a/frontend/src/services/ServerSideStatusUpdateAPI.ts +++ b/frontend/src/services/ServerSideStatusUpdateAPI.ts @@ -8,10 +8,7 @@ export function triggerStatusUpdateAPI( database: string, datahandler: (i: eventResponsetypes) => void ) { - let encodedstr; - if (password) { - encodedstr = btoa(password); - } + let encodedstr: string = password ? btoa(password) : ''; const eventSource = new EventSource( `${url()}/update_extract_status/${name}?url=${uri}&userName=${username}&password=${encodedstr}&database=${database}` ); From 771753bf8bca4039c1dbcdd61096668da0aa1daa Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 5 Sep 2024 12:07:22 +0000 Subject: [PATCH 050/292] reset the values to 0 when the retry option is start from begining --- frontend/src/components/Content.tsx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 12213f66b..939b973ec 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -513,15 +513,16 @@ const Content: React.FC = ({ if (response.data.status === 'Failure') { throw new Error(response.data.error); } + const isStartFromBegining = retryoption === RETRY_OPIONS[0]; setFilesData((prev) => { return prev.map((f) => { return f.name === filename ? { ...f, status: 'Reprocess', - processingProgress: retryoption.includes('start_from_beginning') ? 0 : f.processingProgress, - NodesCount: retryoption === RETRY_OPIONS[1] ? 0 : f.NodesCount, - relationshipCount: retryoption === RETRY_OPIONS[1] ? 0 : f.relationshipCount, + processingProgress: isStartFromBegining ? 0 : f.processingProgress, + NodesCount: isStartFromBegining ? 0 : f.NodesCount, + relationshipCount: isStartFromBegining ? 0 : f.relationshipCount, } : f; }); From d56a60d426ef9bbc78ff0b4c34d07c26aa5cd608 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Thu, 5 Sep 2024 12:11:45 +0000 Subject: [PATCH 051/292] Update Node/Document status using SSE, Trying to fix Cancelled by Scope error --- backend/score.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/backend/score.py b/backend/score.py index 7db8ab069..52bd166bb 100644 --- a/backend/score.py +++ b/backend/score.py @@ -433,25 +433,25 @@ async def generate(): logging.info(" SSE Client disconnected") break # get the current status of document node - graph = create_graph_database_connection(uri, userName, decoded_password, database) - graphDb_data_Access = graphDBdataAccess(graph) - result = graphDb_data_Access.get_current_status_document_node(file_name) - print(f'Result of document status in SSE : {result}') - if len(result) > 0: - status = json.dumps({'fileName':file_name, - 'status':result[0]['Status'], - 'processingTime':result[0]['processingTime'], - 'nodeCount':result[0]['nodeCount'], - 'relationshipCount':result[0]['relationshipCount'], - 'model':result[0]['model'], - 'total_chunks':result[0]['total_chunks'], - 'fileSize':result[0]['fileSize'], - 'processed_chunk':result[0]['processed_chunk'], - 'fileSource':result[0]['fileSource'] - }) + else: - status = json.dumps({'fileName':file_name, 'status':'Failed'}) - yield status + graph = create_graph_database_connection(uri, userName, decoded_password, database) + graphDb_data_Access = graphDBdataAccess(graph) + result = graphDb_data_Access.get_current_status_document_node(file_name) + print(f'Result of document status in SSE : {result}') + if len(result) > 0: + status = json.dumps({'fileName':file_name, + 'status':result[0]['Status'], + 'processingTime':result[0]['processingTime'], + 'nodeCount':result[0]['nodeCount'], + 'relationshipCount':result[0]['relationshipCount'], + 'model':result[0]['model'], + 'total_chunks':result[0]['total_chunks'], + 'fileSize':result[0]['fileSize'], + 'processed_chunk':result[0]['processed_chunk'], + 'fileSource':result[0]['fileSource'] + }) + yield status except asyncio.CancelledError: logging.info("SSE Connection cancelled") From 3f98459e3100550be57c86282fe0d6003c4e0771 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 5 Sep 2024 14:13:55 +0000 Subject: [PATCH 052/292] modified the chat mode settings --- backend/src/QA_integration_new.py | 126 +++++++++++++++++++----------- 1 file changed, 79 insertions(+), 47 deletions(-) diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration_new.py index 6fe4907f1..e595a820e 100644 --- a/backend/src/QA_integration_new.py +++ b/backend/src/QA_integration_new.py @@ -1,29 +1,26 @@ -from langchain_community.vectorstores.neo4j_vector import Neo4jVector import os +import json +from datetime import datetime +import time +from typing import Any + from dotenv import load_dotenv import logging + +# LangChain imports +from langchain_community.vectorstores.neo4j_vector import Neo4jVector from langchain_community.chat_message_histories import Neo4jChatMessageHistory -from src.llm import get_llm -from src.shared.common_fn import load_embedding_model -import re -from typing import Any -from datetime import datetime -import time from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnableBranch from langchain.retrievers import ContextualCompressionRetriever from langchain_community.document_transformers import EmbeddingsRedundantFilter -from langchain.retrievers.document_compressors import EmbeddingsFilter -from langchain.retrievers.document_compressors import DocumentCompressorPipeline +from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline from langchain_text_splitters import TokenTextSplitter -from langchain_core.messages import HumanMessage,AIMessage -from src.shared.constants import * -from src.llm import get_llm +from langchain_core.messages import HumanMessage, AIMessage from langchain.chains import GraphCypherQAChain -import json -## Chat models +# LangChain chat models from langchain_openai import ChatOpenAI, AzureChatOpenAI from langchain_google_vertexai import ChatVertexAI from langchain_groq import ChatGroq @@ -32,6 +29,11 @@ from langchain_aws import ChatBedrock from langchain_community.chat_models import ChatOllama +# Local imports +from src.llm import get_llm +from src.shared.common_fn import load_embedding_model +from src.shared.constants import * + load_dotenv() EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') @@ -91,23 +93,27 @@ def clear_chat_history(graph, session_id): def get_sources_and_chunks(sources_used, docs): chunkdetails_list = [] - sources_used_set = set(sources_used) + seen_ids_and_scores = set() for doc in docs: try: source = doc.metadata.get("source") chunkdetails = doc.metadata.get("chunkdetails", []) - + if source in sources_used_set: - formatted_chunkdetails = [ - {**chunkdetail, "score": round(chunkdetail.get("score", 0), 4)} - for chunkdetail in chunkdetails - ] - chunkdetails_list.extend(formatted_chunkdetails) - + for chunkdetail in chunkdetails: + id = chunkdetail.get("id") + score = round(chunkdetail.get("score", 0), 4) + + id_and_score = (id, score) + + if id_and_score not in seen_ids_and_scores: + seen_ids_and_scores.add(id_and_score) + chunkdetails_list.append({**chunkdetail, "score": score}) + except Exception as e: - logging.error(f"Error processing document: {e}", exc_info=True) + logging.error(f"Error processing document: {e}") result = { 'sources': sources_used, @@ -261,7 +267,7 @@ def initialize_neo4j_vector(graph, chat_mode_settings): if not retrieval_query or not index_name: raise ValueError("Required settings 'retrieval_query' or 'index_name' are missing.") - if mode in ["fulltext", "graph+vector+fulltext"]: + if keyword_index: neo_db = Neo4jVector.from_existing_graph( embedding=EMBEDDING_FUNCTION, index_name=index_name, @@ -281,15 +287,15 @@ def initialize_neo4j_vector(graph, chat_mode_settings): retrieval_query=retrieval_query, graph=graph ) + logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'") - except Exception as e: logging.error(f"Error retrieving Neo4jVector index : {e}") raise return neo_db -def create_retriever(neo_db, document_names, search_k, score_threshold): - if document_names: +def create_retriever(neo_db, document_names, chat_mode_settings,search_k, score_threshold): + if document_names and chat_mode_settings["document_filter"]: retriever = neo_db.as_retriever( search_type="similarity_score_threshold", search_kwargs={ @@ -312,7 +318,7 @@ def get_neo4j_retriever(graph, document_names,chat_mode_settings, search_k=CHAT_ neo_db = initialize_neo4j_vector(graph, chat_mode_settings) document_names= list(map(str.strip, json.loads(document_names))) - retriever = create_retriever(neo_db, document_names, search_k, score_threshold) + retriever = create_retriever(neo_db, document_names,chat_mode_settings, search_k, score_threshold) return retriever except Exception as e: logging.error(f"Error retrieving Neo4jVector index or creating retriever: {e}") @@ -353,7 +359,7 @@ def process_chat_response(messages,history, question, model, graph, document_nam result = {"sources": [], "chunkdetails": []} total_tokens = 0 - ai_response = AIMessage(content=result["message"]) + ai_response = AIMessage(content=content) messages.append(ai_response) summarize_and_log(history, messages, llm) @@ -528,37 +534,61 @@ def create_neo4j_chat_message_history(graph, session_id): raise def get_chat_mode_settings(mode): - chat_mode_settings = {} - # logging.info(f"Received mode: {mode}") + settings_map = { + "vector": { + "retrieval_query": VECTOR_SEARCH_QUERY, + "index_name": "vector", + "keyword_index": None, + "document_filter": True + }, + "fulltext": { + "retrieval_query": VECTOR_SEARCH_QUERY, + "index_name": "vector", + "keyword_index": "keyword", + "document_filter": False + }, + "local_community_search": { + "retrieval_query": LOCAL_COMMUNITY_SEARCH_QUERY, + "index_name": "entity_vector", + "keyword_index": None, + "document_filter": False + }, + "graph+vector": { + "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT), + "index_name": "vector", + "keyword_index": None, + "document_filter": True + }, + "graph+vector+fulltext": { + "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT), + "index_name": "vector", + "keyword_index": "keyword", + "document_filter": False + } + } + + default_settings = { + "retrieval_query": VECTOR_SEARCH_QUERY, + "index_name": "vector", + "keyword_index": None, + "document_filter": False + } try: - if mode in ["vector", "fulltext"]: - chat_mode_settings["retrieval_query"] = VECTOR_SEARCH_QUERY - chat_mode_settings["index_name"] = "vector" - chat_mode_settings["keyword_index"] = "keyword" - elif mode == "local_community_search": - chat_mode_settings["retrieval_query"] = LOCAL_COMMUNITY_SEARCH_QUERY - chat_mode_settings["index_name"] = "entity_vector" - chat_mode_settings["keyword_index"] = "" - elif mode in ['graph+vector', 'graph+vector+fulltext']: - chat_mode_settings["retrieval_query"] = VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT) - chat_mode_settings["index_name"] = "vector" - chat_mode_settings["keyword_index"] = "keyword" - else: - raise ValueError(f"Invalid mode: {mode}") + chat_mode_settings = settings_map.get(mode, default_settings) + chat_mode_settings["mode"] = mode logging.info(f"Chat mode settings: {chat_mode_settings}") except Exception as e: logging.error(f"Unexpected error: {e}", exc_info=True) - raise e + raise return chat_mode_settings def QA_RAG(graph, model, question, document_names, session_id, mode): logging.info(f"Chat Mode: {mode}") - chat_mode_settings = get_chat_mode_settings(mode=mode) history = create_neo4j_chat_message_history(graph, session_id) messages = history.messages @@ -569,7 +599,9 @@ def QA_RAG(graph, model, question, document_names, session_id, mode): if mode == "graph": result = process_graph_response(model, graph, question, messages, history) else: + chat_mode_settings = get_chat_mode_settings(mode=mode) result = process_chat_response(messages,history, question, model, graph, document_names,chat_mode_settings) result["session_id"] = session_id + return result \ No newline at end of file From 09b0180af013d32921a87decd20add7fad987605 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 5 Sep 2024 14:27:14 +0000 Subject: [PATCH 053/292] renamed QA_integration --- backend/score.py | 2 +- backend/src/{QA_integration_new.py => QA_integration.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename backend/src/{QA_integration_new.py => QA_integration.py} (100%) diff --git a/backend/score.py b/backend/score.py index 19f7ddb1a..329c8f5b2 100644 --- a/backend/score.py +++ b/backend/score.py @@ -2,7 +2,7 @@ from fastapi_health import health from fastapi.middleware.cors import CORSMiddleware from src.main import * -from src.QA_integration_new import * +from src.QA_integration import * from src.entities.user_credential import user_credential from src.shared.common_fn import * import uvicorn diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration.py similarity index 100% rename from backend/src/QA_integration_new.py rename to backend/src/QA_integration.py From 19b7bf2f67af47484062872e71c9373bc3984647 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 5 Sep 2024 14:43:09 +0000 Subject: [PATCH 054/292] added graphdatascience --- backend/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/requirements.txt b/backend/requirements.txt index 46c57aea5..b83a76b82 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -179,3 +179,4 @@ sentence-transformers==3.0.1 google-cloud-logging==3.10.0 PyMuPDF==1.24.5 pypandoc==1.13 +graphdatascience==1.10 From c91febba99224ef963065f059f46835165d120ce Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 5 Sep 2024 14:49:41 +0000 Subject: [PATCH 055/292] moved settings to constants --- backend/src/QA_integration.py | 44 ++------------------------------- backend/src/shared/constants.py | 38 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 42 deletions(-) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index e595a820e..72c1232fa 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -533,48 +533,8 @@ def create_neo4j_chat_message_history(graph, session_id): logging.error(f"Error creating Neo4jChatMessageHistory: {e}") raise -def get_chat_mode_settings(mode): - - settings_map = { - "vector": { - "retrieval_query": VECTOR_SEARCH_QUERY, - "index_name": "vector", - "keyword_index": None, - "document_filter": True - }, - "fulltext": { - "retrieval_query": VECTOR_SEARCH_QUERY, - "index_name": "vector", - "keyword_index": "keyword", - "document_filter": False - }, - "local_community_search": { - "retrieval_query": LOCAL_COMMUNITY_SEARCH_QUERY, - "index_name": "entity_vector", - "keyword_index": None, - "document_filter": False - }, - "graph+vector": { - "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT), - "index_name": "vector", - "keyword_index": None, - "document_filter": True - }, - "graph+vector+fulltext": { - "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT), - "index_name": "vector", - "keyword_index": "keyword", - "document_filter": False - } - } - - default_settings = { - "retrieval_query": VECTOR_SEARCH_QUERY, - "index_name": "vector", - "keyword_index": None, - "document_filter": False - } - +def get_chat_mode_settings(mode,settings_map=CHAT_MODE_CONFIG_MAP): + default_settings = settings_map["default"] try: chat_mode_settings = settings_map.get(mode, default_settings) chat_mode_settings["mode"] = mode diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index c4d56f963..303e8d656 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -279,4 +279,42 @@ LOCAL_COMMUNITY_SEARCH_QUERY = """""" +CHAT_MODE_CONFIG_MAP= { + "vector": { + "retrieval_query": VECTOR_SEARCH_QUERY, + "index_name": "vector", + "keyword_index": None, + "document_filter": True + }, + "fulltext": { + "retrieval_query": VECTOR_SEARCH_QUERY, + "index_name": "vector", + "keyword_index": "keyword", + "document_filter": False + }, + "local_community_search": { + "retrieval_query": LOCAL_COMMUNITY_SEARCH_QUERY, + "index_name": "entity_vector", + "keyword_index": None, + "document_filter": False + }, + "graph+vector": { + "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT), + "index_name": "vector", + "keyword_index": None, + "document_filter": True + }, + "graph+vector+fulltext": { + "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT), + "index_name": "vector", + "keyword_index": "keyword", + "document_filter": False + }, + "default": { + "retrieval_query": VECTOR_SEARCH_QUERY, + "index_name": "vector", + "keyword_index": None, + "document_filter": True + } + } YOUTUBE_CHUNK_SIZE_SECONDS = 60 From 169c2727820ea0469c88bf6ccfa10a46617442fe Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 5 Sep 2024 16:36:10 +0000 Subject: [PATCH 056/292] modified constants --- backend/src/shared/constants.py | 141 +++++++++++++------------------- 1 file changed, 56 insertions(+), 85 deletions(-) diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 303e8d656..c15602e25 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -68,7 +68,6 @@ ("ollama_llama3") : 2 } - ### CHAT TEMPLATES CHAT_SYSTEM_TEMPLATE = """ You are an AI-powered question-answering agent. Your task is to provide accurate and comprehensive responses to user queries based on the given context, chat history, and available resources. @@ -115,7 +114,6 @@ QUESTION_TRANSFORM_TEMPLATE = "Given the below conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else." - ## CHAT QUERIES VECTOR_SEARCH_QUERY = """ WITH node AS chunk, score @@ -130,88 +128,6 @@ {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} as metadata """ -# VECTOR_GRAPH_SEARCH_QUERY=""" -# WITH node as chunk, score -# MATCH (chunk)-[:__PART_OF__]->(d:__Document__) -# CALL { WITH chunk -# MATCH (chunk)-[:__HAS_ENTITY__]->(e) -# MATCH path=(e)(()-[rels:!__HAS_ENTITY__&!__PART_OF__]-()){0,2}(:!__Chunk__&!__Document__) -# UNWIND rels as r -# RETURN collect(distinct r) as rels -# } -# WITH d, collect(DISTINCT {chunk: chunk, score: score}) AS chunks, avg(score) as avg_score, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels -# WITH d, avg_score, -# [c IN chunks | c.chunk.text] AS texts, -# [c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails, -# [r in rels | coalesce(apoc.coll.removeAll(labels(startNode(r)),['__Entity__'])[0],"") +":"+ startNode(r).id + " "+ type(r) + " " + coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" + endNode(r).id] as entities -# WITH d, avg_score,chunkdetails, -# apoc.text.join(texts,"\n----\n") + -# apoc.text.join(entities,"\n") -# as text -# RETURN text, avg_score AS score, {source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} AS metadata -# """ - - -# VECTOR_GRAPH_SEARCH_QUERY = """ -# WITH node as chunk, score -# // find the document of the chunk -# MATCH (chunk)-[:__PART_OF__]->(d:__Document__) -# // fetch entities -# CALL { WITH chunk -# // entities connected to the chunk -# // todo only return entities that are actually in the chunk, remember we connect all extracted entities to all chunks -# MATCH (chunk)-[:__HAS_ENTITY__]->(e) - -# // depending on match to query embedding either 1 or 2 step expansion -# WITH CASE WHEN true // vector.similarity.cosine($embedding, e.embedding ) <= 0.95 -# THEN -# collect { MATCH path=(e)(()-[rels:!__HAS_ENTITY__&!__PART_OF__]-()){0,1}(:!Chunk&!__Document__) RETURN path } -# ELSE -# collect { MATCH path=(e)(()-[rels:!__HAS_ENTITY__&!__PART_OF__]-()){0,2}(:!Chunk&!__Document__) RETURN path } -# END as paths - -# RETURN collect{ unwind paths as p unwind relationships(p) as r return distinct r} as rels, -# collect{ unwind paths as p unwind nodes(p) as n return distinct n} as nodes -# } -# // aggregate chunk-details and de-duplicate nodes and relationships -# WITH d, collect(DISTINCT {chunk: chunk, score: score}) AS chunks, avg(score) as avg_score, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels, - -# // TODO sort by relevancy (embeddding comparision?) cut off after X (e.g. 25) nodes? -# apoc.coll.toSet(apoc.coll.flatten(collect( -# [r in rels |[startNode(r),endNode(r)]]),true)) as nodes - -# // generate metadata and text components for chunks, nodes and relationships -# WITH d, avg_score, -# [c IN chunks | c.chunk.text] AS texts, -# [c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails, -# apoc.coll.sort([n in nodes | - -# coalesce(apoc.coll.removeAll(labels(n),['__Entity__'])[0],"") +":"+ -# n.id + (case when n.description is not null then " ("+ n.description+")" else "" end)]) as nodeTexts, -# apoc.coll.sort([r in rels -# // optional filter if we limit the node-set -# // WHERE startNode(r) in nodes AND endNode(r) in nodes -# | -# coalesce(apoc.coll.removeAll(labels(startNode(r)),['__Entity__'])[0],"") +":"+ -# startNode(r).id + -# " " + type(r) + " " + -# coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" + -# endNode(r).id -# ]) as relTexts - -# // combine texts into response-text -# WITH d, avg_score,chunkdetails, -# "Text Content:\n" + -# apoc.text.join(texts,"\n----\n") + -# "\n----\nEntities:\n"+ -# apoc.text.join(nodeTexts,"\n") + -# "\n----\nRelationships:\n"+ -# apoc.text.join(relTexts,"\n") - -# as text -# RETURN text, avg_score as score, {length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} AS metadata -# """ - VECTOR_GRAPH_SEARCH_ENTITY_LIMIT = 25 VECTOR_GRAPH_SEARCH_QUERY = """ @@ -277,7 +193,62 @@ RETURN text, avg_score as score, {{length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails}} AS metadata """ -LOCAL_COMMUNITY_SEARCH_QUERY = """""" + +### Local community search +LOCAL_COMMUNITY_TOP_CHUNKS = 3 +LOCAL_COMMUNITY_TOP_COMMUNITIES = 3 +LOCAL_COMMUNITY_TOP_OUTSIDE_RELS = 10 +LOCAL_COMMUNITY_TOP_INSIDE_RELS = 10 +LOCAL_COMMUNITY_TOP_ENTITIES = 10 + +LOCAL_COMMUNITY_SEARCH_QUERY = """ +WITH collect(node) as nodes +WITH +collect { + UNWIND nodes as n + MATCH (n)<-[:HAS_ENTITY]->(c:Chunk) + WITH c, count(distinct n) as freq + RETURN c.text AS chunkText + ORDER BY freq DESC + LIMIT $topChunks +} AS text_mapping, +// Entity - Report Mapping +collect { + UNWIND nodes as n + MATCH (n)-[:IN_COMMUNITY]->(c:__Community__) + WITH c, c.rank as rank, c.weight AS weight + RETURN c.summary + ORDER BY rank, weight DESC + LIMIT $topCommunities +} AS report_mapping, +// Outside Relationships +collect { + UNWIND nodes as n + MATCH (n)-[r:RELATED]-(m) + WHERE NOT m IN nodes + RETURN r.description AS descriptionText + ORDER BY r.rank, r.weight DESC + LIMIT $topOutsideRels +} as outsideRels, +// Inside Relationships +collect { + UNWIND nodes as n + MATCH (n)-[r:RELATED]-(m) + WHERE m IN nodes + RETURN r.description AS descriptionText + ORDER BY r.rank, r.weight DESC + LIMIT $topInsideRels +} as insideRels, +// Entities description +collect { + UNWIND nodes as n + RETURN n.description AS descriptionText +} as entities +// We don't have covariates or claims here +RETURN {Chunks: text_mapping, Reports: report_mapping, + Relationships: outsideRels + insideRels, + Entities: entities} AS text, 1.0 AS score, {} AS metadata +""" CHAT_MODE_CONFIG_MAP= { "vector": { From ba131448d313e97e7d6fe8816a230ad4096ebb33 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 6 Sep 2024 06:36:36 +0000 Subject: [PATCH 057/292] resetting the nodescount and relationshipcount --- frontend/src/components/Content.tsx | 2 +- frontend/src/components/FileTable.tsx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 939b973ec..c77660212 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -513,7 +513,7 @@ const Content: React.FC = ({ if (response.data.status === 'Failure') { throw new Error(response.data.error); } - const isStartFromBegining = retryoption === RETRY_OPIONS[0]; + const isStartFromBegining = retryoption === RETRY_OPIONS[0] || retryoption===RETRY_OPIONS[1]; setFilesData((prev) => { return prev.map((f) => { return f.name === filename diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index aa4d9bb9a..04e2a8841 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -146,7 +146,7 @@ const FileTable = forwardRef((props, ref) => { > {info.getValue()} - {(info.getValue() === 'Completed' || + {/* {(info.getValue() === 'Completed' || info.getValue() === 'Failed' || info.getValue() === 'Cancelled') && ( @@ -161,7 +161,7 @@ const FileTable = forwardRef((props, ref) => { - )} + )} */}
); } else if (info.getValue() === 'Processing' && info.row.original.processingProgress === undefined) { From 4cc8104a0996c5d81ec8eb054d9439b0f8bb0528 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Fri, 6 Sep 2024 07:46:48 +0000 Subject: [PATCH 058/292] Add vector index exist condition to create --- backend/src/make_relationships.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/backend/src/make_relationships.py b/backend/src/make_relationships.py index 3045649d5..4783c5abe 100644 --- a/backend/src/make_relationships.py +++ b/backend/src/make_relationships.py @@ -67,20 +67,23 @@ def update_embedding_create_vector_index(graph, chunkId_chunkDoc_list, file_name # ) # logging.info('create vector index on chunk embedding') result = graph.query("SHOW INDEXES YIELD * WHERE labelsOrTypes = ['__Chunk__'] and name = 'vector'") + vector_index = graph.query("SHOW INDEXES YIELD * WHERE labelsOrTypes = ['Chunk'] and type = 'VECTOR' AND name = 'vector' return options") if result: logging.info(f"vector index dropped for 'Chunk'") graph.query("DROP INDEX vector IF EXISTS;") - graph.query("""CREATE VECTOR INDEX `vector` if not exists for (c:Chunk) on (c.embedding) - OPTIONS {indexConfig: { - `vector.dimensions`: $dimensions, - `vector.similarity_function`: 'cosine' - }} - """, - { - "dimensions" : dimension - } - ) + if len(vector_index) == 0: + logging.info(f'vector index is not exist, will create in next query') + graph.query("""CREATE VECTOR INDEX `vector` if not exists for (c:Chunk) on (c.embedding) + OPTIONS {indexConfig: { + `vector.dimensions`: $dimensions, + `vector.similarity_function`: 'cosine' + }} + """, + { + "dimensions" : dimension + } + ) query_to_create_embedding = """ UNWIND $data AS row From 1222942539022e0c3ea0ee79dca9845fc6c95d67 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 6 Sep 2024 20:27:25 +0530 Subject: [PATCH 059/292] Science Molecule & database icon addition (#722) * DataScience icon addition * added gds status to connect call * icon stroke changes --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> --- backend/src/graphDB_dataAccess.py | 32 ++++++++++++++++--- frontend/src/components/ChatBot/Chatbot.tsx | 2 +- .../ChatBot/ExpandedChatButtonContainer.tsx | 2 +- frontend/src/components/Content.tsx | 16 ++++++---- .../components/DataSources/Local/DropZone.tsx | 2 +- frontend/src/components/FileTable.tsx | 2 +- .../src/components/Graph/GraphViewModal.tsx | 2 +- frontend/src/components/Layout/Header.tsx | 2 +- frontend/src/components/Layout/SideNav.tsx | 2 +- frontend/src/components/UI/DatabaseIcon.tsx | 25 +++++++++++++++ .../src/components/UI/IconButtonToolTip.tsx | 27 ++++++++++++++-- .../WebSources/GenericSourceButton.tsx | 2 +- frontend/src/types.ts | 6 ++++ frontend/src/utils/Constants.ts | 8 +++++ 14 files changed, 108 insertions(+), 22 deletions(-) create mode 100644 frontend/src/components/UI/DatabaseIcon.tsx diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index c77a1e773..7866c81c4 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -140,6 +140,27 @@ def update_KNN_graph(self): ) else: logging.info("Vector index does not exist, So KNN graph not update") + + def check_gds_version(self): + try: + gds_procedure_count = """ + SHOW PROCEDURES + YIELD name + WHERE name STARTS WITH "gds." + RETURN COUNT(*) AS totalGdsProcedures + """ + result = self.graph.query(gds_procedure_count) + total_gds_procedures = result[0]['totalGdsProcedures'] if result else 0 + + if total_gds_procedures > 0: + logging.info("GDS is available in the database.") + return True + else: + logging.info("GDS is not available in the database.") + return False + except Exception as e: + logging.error(f"An error occurred while checking GDS version: {e}") + return False def connection_check_and_get_vector_dimensions(self): """ @@ -166,19 +187,20 @@ def connection_check_and_get_vector_dimensions(self): embedding_model = os.getenv('EMBEDDING_MODEL') embeddings, application_dimension = load_embedding_model(embedding_model) logging.info(f'embedding model:{embeddings} and dimesion:{application_dimension}') - # print(chunks_exists) + + gds_status = self.check_gds_version() if self.graph: if len(db_vector_dimension) > 0: - return {'db_vector_dimension': db_vector_dimension[0]['vector_dimensions'], 'application_dimension':application_dimension, 'message':"Connection Successful"} + return {'db_vector_dimension': db_vector_dimension[0]['vector_dimensions'], 'application_dimension':application_dimension, 'message':"Connection Successful","gds_status":gds_status} else: if len(db_vector_dimension) == 0 and len(result_chunks) == 0: logging.info("Chunks and vector index does not exists in database") - return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":False} + return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":False,"gds_status":gds_status} elif len(db_vector_dimension) == 0 and result_chunks[0]['hasEmbedding']==0 and result_chunks[0]['chunks'] > 0: - return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":True} + return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":True,"gds_status":gds_status} else: - return {'message':"Connection Successful"} + return {'message':"Connection Successful","gds_status":gds_status} def execute_query(self, query, param=None): return self.graph.query(query, param) diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index da5aaaf29..05541aeff 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -14,7 +14,7 @@ import { v4 as uuidv4 } from 'uuid'; import { useFileContext } from '../../context/UsersFiles'; import clsx from 'clsx'; import ReactMarkdown from 'react-markdown'; -import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { buttonCaptions, tooltips } from '../../utils/Constants'; import useSpeechSynthesis from '../../hooks/useSpeech'; import ButtonWithToolTip from '../UI/ButtonWithToolTip'; diff --git a/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx index 49925466f..465687c4b 100644 --- a/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx +++ b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx @@ -2,7 +2,7 @@ import { TrashIconOutline, XMarkIconOutline } from '@neo4j-ndl/react/icons'; import ChatModeToggle from './ChatModeToggle'; import { Box, IconButton } from '@neo4j-ndl/react'; import { IconProps } from '../../types'; -import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { tooltips } from '../../utils/Constants'; import { useState } from 'react'; import { RiChatSettingsLine } from 'react-icons/ri'; diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 4bde5a9b4..8f42202e4 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -32,6 +32,7 @@ import DeletePopUp from './Popups/DeletePopUp/DeletePopUp'; import GraphEnhancementDialog from './Popups/GraphEnhancementDialog'; import { tokens } from '@neo4j-ndl/base'; import axios from 'axios'; +import DatabaseStatusIcon from './UI/DatabaseIcon'; const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); @@ -86,6 +87,7 @@ const Content: React.FC = ({ alertType: 'error', alertMessage: '', }); + const isGdsActive = true; const { updateStatusForLargeFiles } = useServerSideEvent( (inMinutes, time, fileName) => { setalertDetails({ @@ -651,6 +653,8 @@ const Content: React.FC = ({ } }; + console.log('isGds', isGdsActive); + return ( <> {alertDetails.showAlert && ( @@ -711,16 +715,14 @@ const Content: React.FC = ({ chunksExistsWithDifferentEmbedding={openConnection.chunksExistsWithDifferentDimension} />
-
Neo4j connection - {!connectionStatus ? : } - {connectionStatus ? ( - {userCredentials?.uri} - ) : ( - Not Connected - )} +
{!isSchema ? ( diff --git a/frontend/src/components/DataSources/Local/DropZone.tsx b/frontend/src/components/DataSources/Local/DropZone.tsx index 63ff08e37..d0bf429a5 100644 --- a/frontend/src/components/DataSources/Local/DropZone.tsx +++ b/frontend/src/components/DataSources/Local/DropZone.tsx @@ -8,7 +8,7 @@ import CustomAlert from '../../UI/Alert'; import { CustomFile, CustomFileBase, UserCredentials, alertStateType } from '../../../types'; import { buttonCaptions, chunkSize } from '../../../utils/Constants'; import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; -import IconButtonWithToolTip from '../../UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from '../../UI/IconButtonToolTip'; import { uploadAPI } from '../../../utils/FileAPI'; const DropZone: FunctionComponent = () => { diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 23310eaf4..9dad8b18d 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -45,7 +45,7 @@ import useServerSideEvent from '../hooks/useSse'; import { AxiosError } from 'axios'; import { XMarkIconOutline } from '@neo4j-ndl/react/icons'; import cancelAPI from '../services/CancelAPI'; -import IconButtonWithToolTip from './UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from './UI/IconButtonToolTip'; import { batchSize, largeFileSize, llms } from '../utils/Constants'; import IndeterminateCheckbox from './UI/CustomCheckBox'; let onlyfortheFirstRender = true; diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 39438b788..07f276e3f 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -30,7 +30,7 @@ import { MagnifyingGlassMinusIconOutline, MagnifyingGlassPlusIconOutline, } from '@neo4j-ndl/react/icons'; -import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { filterData, processGraphData, sortAlphabetically } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; import { LegendsChip } from './LegendsChip'; diff --git a/frontend/src/components/Layout/Header.tsx b/frontend/src/components/Layout/Header.tsx index 345a004fd..acf62eefa 100644 --- a/frontend/src/components/Layout/Header.tsx +++ b/frontend/src/components/Layout/Header.tsx @@ -8,7 +8,7 @@ import { } from '@neo4j-ndl/react/icons'; import { Typography } from '@neo4j-ndl/react'; import { useCallback, useEffect } from 'react'; -import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { tooltips } from '../../utils/Constants'; import { useFileContext } from '../../context/UsersFiles'; diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index edbfb4d29..bfeb27367 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -17,7 +17,7 @@ import ExpandedChatButtonContainer from '../ChatBot/ExpandedChatButtonContainer' import { APP_SOURCES, tooltips } from '../../utils/Constants'; import ChatModeToggle from '../ChatBot/ChatModeToggle'; import { RiChatSettingsLine } from 'react-icons/ri'; -import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import GCSButton from '../DataSources/GCS/GCSButton'; import S3Component from '../DataSources/AWS/S3Bucket'; import WebButton from '../DataSources/Web/WebButton'; diff --git a/frontend/src/components/UI/DatabaseIcon.tsx b/frontend/src/components/UI/DatabaseIcon.tsx new file mode 100644 index 000000000..ae291e401 --- /dev/null +++ b/frontend/src/components/UI/DatabaseIcon.tsx @@ -0,0 +1,25 @@ +import { CircleStackIconOutline, ScienceMoleculeIcon } from '@neo4j-ndl/react/icons'; +import { IconWithToolTip } from './IconButtonToolTip'; +import { DatabaseStatusProps } from '../../types'; +import { connectionLabels } from '../../utils/Constants'; + +const DatabaseStatusIcon: React.FC = ({ isConnected, isGdsActive, uri }) => { + const iconStyle = { fill: 'none', stroke: isConnected ? connectionLabels.greenStroke : connectionLabels.redStroke }; + const text = isGdsActive ? connectionLabels.graphDataScience : connectionLabels.graphDatabase; + return ( +
+ + + {isGdsActive ? ( + + ) : ( + + )} + + + {isConnected ? uri : connectionLabels.notConnected} +
+ ); +}; + +export default DatabaseStatusIcon; diff --git a/frontend/src/components/UI/IconButtonToolTip.tsx b/frontend/src/components/UI/IconButtonToolTip.tsx index 62f438005..160c77b87 100644 --- a/frontend/src/components/UI/IconButtonToolTip.tsx +++ b/frontend/src/components/UI/IconButtonToolTip.tsx @@ -1,6 +1,6 @@ import { IconButton, Tip } from '@neo4j-ndl/react'; -const IconButtonWithToolTip = ({ +export const IconButtonWithToolTip = ({ text, children, onClick, @@ -42,4 +42,27 @@ const IconButtonWithToolTip = ({ ); }; -export default IconButtonWithToolTip; +export const IconWithToolTip = ({ + text, + children, + placement = 'bottom', +}: { + label: string; + text: string | React.ReactNode; + children: React.ReactNode; + onClick?: React.MouseEventHandler | undefined; + size?: 'small' | 'medium' | 'large'; + clean?: boolean; + grouped?: boolean; + placement?: 'bottom' | 'top' | 'right' | 'left'; + disabled?: boolean; +}) => { + return ( + + {children} + + {text} + + + ); +}; diff --git a/frontend/src/components/WebSources/GenericSourceButton.tsx b/frontend/src/components/WebSources/GenericSourceButton.tsx index 65c8b0046..96aab02ff 100644 --- a/frontend/src/components/WebSources/GenericSourceButton.tsx +++ b/frontend/src/components/WebSources/GenericSourceButton.tsx @@ -1,6 +1,6 @@ import { DataComponentProps } from '../../types'; import { Flex, Typography } from '@neo4j-ndl/react'; -import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; import { APP_SOURCES } from '../../utils/Constants'; import WebButton from '../DataSources/Web/WebButton'; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 5bf076d10..86e54a794 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -648,3 +648,9 @@ export interface MessageContextType { messages: Messages[] | []; setMessages: Dispatch>; } + +export interface DatabaseStatusProps { + isConnected: boolean; + isGdsActive: boolean; + uri: string | null; +} diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 47f1e119f..7382d3baa 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -236,3 +236,11 @@ export const graphLabels = { }; export const RESULT_STEP_SIZE = 25; + +export const connectionLabels = { + notConnected: 'Not Connected', + graphDataScience: 'Graph Data Science', + graphDatabase: 'Graph Database', + greenStroke: 'green', + redStroke: 'red', +}; From 0c278de458c4d09bce1dc4ac277354a0c21137d3 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 6 Sep 2024 21:58:44 +0530 Subject: [PATCH 060/292] Add communities check and show respective chat modes (#729) * DataScience icon addition * added checkbox to create_communities * added conditionall check for community chat modes --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> --- .../src/components/ChatBot/ChatModeToggle.tsx | 44 +++++++++++----- frontend/src/components/Content.tsx | 2 +- .../PostProcessingCheckList/index.tsx | 51 ++++++++++--------- frontend/src/context/UserCredentials.tsx | 7 ++- frontend/src/context/UsersFiles.tsx | 1 + frontend/src/types.ts | 2 + frontend/src/utils/Constants.ts | 6 ++- frontend/src/utils/Utils.ts | 7 +++ 8 files changed, 81 insertions(+), 39 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index e82dfea4d..ec219898f 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -4,6 +4,7 @@ import { useFileContext } from '../../context/UsersFiles'; import CustomMenu from '../UI/Menu'; import { chatModes } from '../../utils/Constants'; import { capitalize } from '@mui/material'; +import { capitalizeWithPlus } from '../../utils/Utils'; export default function ChatModeToggle({ menuAnchor, @@ -18,7 +19,8 @@ export default function ChatModeToggle({ anchorPortal?: boolean; disableBackdrop?: boolean; }) { - const { setchatMode, chatMode } = useFileContext(); + const { setchatMode, chatMode, postProcessingTasks } = useFileContext(); + const isCommunityAllowed = postProcessingTasks.includes('create_communities'); return ( - chatModes?.map((m) => { + items={useMemo(() => { + if (isCommunityAllowed) { + return chatModes?.map((m) => { return { - title: m.includes('+') - ? m - .split('+') - .map((s) => capitalize(s)) - .join('+') - : capitalize(m), + title: m.includes('+') ? capitalizeWithPlus(m) : capitalize(m), onClick: () => { setchatMode(m); }, @@ -51,9 +48,30 @@ export default function ChatModeToggle({ ), }; - }), - [chatMode, chatModes] - )} + }); + } + return chatModes + ?.filter((s) => !s.includes('community')) + ?.map((m) => { + return { + title: m.includes('+') ? capitalizeWithPlus(m) : capitalize(m), + onClick: () => { + setchatMode(m); + }, + disabledCondition: false, + description: ( + + {chatMode === m && ( + <> + Selected + + )} + + ), + }; + }); + + }, [chatMode, chatModes,isCommunityAllowed])} > ); } diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 8f42202e4..95f632d38 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -59,7 +59,7 @@ const Content: React.FC = ({ const [openGraphView, setOpenGraphView] = useState(false); const [inspectedName, setInspectedName] = useState(''); const [connectionStatus, setConnectionStatus] = useState(false); - const { setUserCredentials, userCredentials } = useCredentials(); + const { setUserCredentials, userCredentials, isGdsActive } = useCredentials(); const [showConfirmationModal, setshowConfirmationModal] = useState(false); const [extractLoading, setextractLoading] = useState(false); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx index 06e76ff8e..57ebe90c4 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx @@ -3,11 +3,12 @@ import { POST_PROCESSING_JOBS } from '../../../../utils/Constants'; import { capitalize } from '../../../../utils/Utils'; import { useFileContext } from '../../../../context/UsersFiles'; import { tokens } from '@neo4j-ndl/base'; - +import { useCredentials } from '../../../../context/UserCredentials'; export default function PostProcessingCheckList() { const { breakpoints } = tokens; const tablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); const { postProcessingTasks, setPostProcessingTasks } = useFileContext(); + const { isGdsActive } = useCredentials(); return (
@@ -18,29 +19,33 @@ export default function PostProcessingCheckList() { - {POST_PROCESSING_JOBS.map((job, idx) => ( - - - {job.title - .split('_') - .map((s) => capitalize(s)) - .join(' ')} - - } - checked={postProcessingTasks.includes(job.title)} - onChange={(e) => { - if (e.target.checked) { - setPostProcessingTasks((prev) => [...prev, job.title]); - } else { - setPostProcessingTasks((prev) => prev.filter((s) => s !== job.title)); + {POST_PROCESSING_JOBS.map((job, idx) => { + const isCreateCommunities = job.title === 'create_communities'; + return ( + + + {job.title + .split('_') + .map((s) => capitalize(s)) + .join(' ')} + } - }} - > - {job.description} - - ))} + checked={postProcessingTasks.includes(job.title)} + onChange={(e) => { + if (e.target.checked) { + setPostProcessingTasks((prev) => [...prev, job.title]); + } else { + setPostProcessingTasks((prev) => prev.filter((s) => s !== job.title)); + } + }} + disabled={isCreateCommunities && !isGdsActive} // Disable if GDS is not active + /> + {job.description} + + ); + })}
diff --git a/frontend/src/context/UserCredentials.tsx b/frontend/src/context/UserCredentials.tsx index 20ed75ae5..af59e41f4 100644 --- a/frontend/src/context/UserCredentials.tsx +++ b/frontend/src/context/UserCredentials.tsx @@ -1,4 +1,4 @@ -import { createContext, useState, useContext, FunctionComponent, ReactNode } from 'react'; +import { createContext, useState, useContext, FunctionComponent, ReactNode, useReducer } from 'react'; import { ContextProps, UserCredentials } from '../types'; type Props = { @@ -8,6 +8,8 @@ type Props = { export const UserConnection = createContext({ userCredentials: null, setUserCredentials: () => null, + isGdsActive: null, + setGdsActive: () => null, }); export const useCredentials = () => { const userCredentials = useContext(UserConnection); @@ -15,9 +17,12 @@ export const useCredentials = () => { }; const UserCredentialsWrapper: FunctionComponent = (props) => { const [userCredentials, setUserCredentials] = useState(null); + const [isGdsActive, setGdsActive] = useReducer((s) => !s, false); const value = { userCredentials, setUserCredentials, + isGdsActive, + setGdsActive, }; return {props.children}; }; diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index fd3531403..df805504e 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -68,6 +68,7 @@ const FileContextProvider: FC = ({ children }) => { 'materialize_text_chunk_similarities', 'enable_hybrid_search_and_fulltext_search_in_bloom', 'materialize_entity_similarities', + 'create_communities', ]); const [processedCount, setProcessedCount] = useState(0); useEffect(() => { diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 86e54a794..0cd584f5d 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -643,6 +643,8 @@ export interface DrawerChatbotProps { export interface ContextProps { userCredentials: UserCredentials | null; setUserCredentials: (UserCredentials: UserCredentials) => void; + isGdsActive: boolean | null; + setGdsActive: () => void; } export interface MessageContextType { messages: Messages[] | []; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 7382d3baa..d3fef33fe 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -59,7 +59,7 @@ export const defaultLLM = llms?.includes('openai-gpt-4o') export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',') - : ['vector', 'graph', 'graph+vector', 'fulltext', 'graph+vector+fulltext']; + : ['vector', 'graph', 'graph+vector', 'fulltext', 'graph+vector+fulltext', 'local community', 'global community']; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; export const timePerByte = 0.2; @@ -184,6 +184,10 @@ export const POST_PROCESSING_JOBS: { title: string; description: string }[] = [ semantic meaning. This facilitates tasks like clustering similar entities, identifying duplicates, and performing similarity-based searches.`, }, + { + title: 'create_communities', + description: 'Create Communities identifies and groups similar entities, improving search accuracy and analysis.', + }, ]; export const batchSize: number = parseInt(process.env.VITE_BATCH_SIZE ?? '2'); diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 0b8b05841..319322d24 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -245,3 +245,10 @@ export const sortAlphabetically = (a: Relationship, b: Relationship) => { const captionTwo = b.caption?.toLowerCase() || ''; return captionOne.localeCompare(captionTwo); }; + +export const capitalizeWithPlus = (s: string) => { + return s + .split('+') + .map((s) => capitalize(s)) + .join('+'); +}; From 7ff05cc10e59e1d6cadf3505534928fd15b68502 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Mon, 9 Sep 2024 14:24:07 +0530 Subject: [PATCH 061/292] youtube transcript issue (#736) * Implement GoogleApiYoutubeLoader for youtube transcript * Url encoding issue fixed for youtube URL * Get transcript using YouTubeTranscriptApi --- backend/src/document_sources/youtube.py | 36 +++++++++++++++++++------ backend/src/main.py | 15 ++++++++++- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/backend/src/document_sources/youtube.py b/backend/src/document_sources/youtube.py index df8ad9a92..fce62774d 100644 --- a/backend/src/document_sources/youtube.py +++ b/backend/src/document_sources/youtube.py @@ -1,3 +1,5 @@ +from pathlib import Path +from langchain.docstore.document import Document from langchain_community.document_loaders import YoutubeLoader from pytube import YouTube from youtube_transcript_api import YouTubeTranscriptApi @@ -8,6 +10,9 @@ from langchain_community.document_loaders.youtube import TranscriptFormat from src.shared.constants import YOUTUBE_CHUNK_SIZE_SECONDS from typing import List, Dict, Any +import os +import re +from langchain_community.document_loaders import GoogleApiClient, GoogleApiYoutubeLoader def get_youtube_transcript(youtube_id): try: @@ -55,14 +60,29 @@ def create_youtube_url(url): def get_documents_from_youtube(url): try: - youtube_loader = YoutubeLoader.from_youtube_url(url, - language=["en-US", "en-gb", "en-ca", "en-au","zh-CN", "zh-Hans", "zh-TW", "fr-FR","de-DE","it-IT","ja-JP","pt-BR","ru-RU","es-ES"], - translation = "en", - add_video_info=True, - transcript_format=TranscriptFormat.CHUNKS, - chunk_size_seconds=YOUTUBE_CHUNK_SIZE_SECONDS) - pages = youtube_loader.load() - file_name = YouTube(url).title + match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',url) + # youtube_loader = YoutubeLoader.from_youtube_url(url, + # language=["en-US", "en-gb", "en-ca", "en-au","zh-CN", "zh-Hans", "zh-TW", "fr-FR","de-DE","it-IT","ja-JP","pt-BR","ru-RU","es-ES"], + # translation = "en", + # add_video_info=True, + # transcript_format=TranscriptFormat.CHUNKS, + # chunk_size_seconds=YOUTUBE_CHUNK_SIZE_SECONDS) + # video_id = parse_qs(urlparse(url).query).get('v') + # cred_path = os.path.join(os.getcwd(),"llm-experiments_credentials.json") + # print(f'Credential file path on youtube.py {cred_path}') + # google_api_client = GoogleApiClient(service_account_path=Path(cred_path)) + # youtube_loader_channel = GoogleApiYoutubeLoader( + # google_api_client=google_api_client, + # video_ids=[video_id[0].strip()], add_video_info=True + # ) + # youtube_transcript = youtube_loader_channel.load() + # pages = youtube_loader.load() + # print(f'youtube page_content: {youtube_transcript[0].page_content}') + # print(f'youtube id: {youtube_transcript[0].metadata["id"]}') + # print(f'youtube title: {youtube_transcript[0].metadata["snippet"]["title"]}') + transcript= get_youtube_combined_transcript(match.group(1)) + file_name = match.group(1)#youtube_transcript[0].metadata["snippet"]["title"] + pages = [Document(page_content=transcript)] return file_name, pages except Exception as e: error_message = str(e) diff --git a/backend/src/main.py b/backend/src/main.py index 5be8286fc..85e0649f7 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -7,6 +7,7 @@ START_FROM_LAST_PROCESSED_POSITION, DELETE_ENTITIES_AND_START_FROM_BEGINNING) from src.shared.schema_extraction import schema_extraction_from_text +from langchain_community.document_loaders import GoogleApiClient, GoogleApiYoutubeLoader from dotenv import load_dotenv from datetime import datetime import logging @@ -140,8 +141,20 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type): obj_source_node.created_at = datetime.now() match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',obj_source_node.url) logging.info(f"match value: {match}") - obj_source_node.file_name = YouTube(obj_source_node.url).title + cred_path = os.path.join(os.getcwd(),"llm-experiments_credentials.json") + print(f'Credential file path {cred_path}') + video_id = parse_qs(urlparse(youtube_url).query).get('v') + print(f'Video Id Youtube: {video_id}') + # google_api_client = GoogleApiClient(service_account_path=Path(cred_path)) + # youtube_loader_channel = GoogleApiYoutubeLoader( + # google_api_client=google_api_client, + # video_ids=[video_id[0].strip()], add_video_info=True + # ) + # youtube_transcript = youtube_loader_channel.load() + + obj_source_node.file_name = match.group(1)#youtube_transcript[0].metadata["snippet"]["title"] transcript= get_youtube_combined_transcript(match.group(1)) + # print(transcript) if transcript==None or len(transcript)==0: message = f"Youtube transcript is not available for : {obj_source_node.file_name}" raise Exception(message) From 13c7bc839ba945454812bf29d88886d68428330d Mon Sep 17 00:00:00 2001 From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Date: Mon, 9 Sep 2024 14:25:08 +0000 Subject: [PATCH 062/292] added chnages to graph schema --- backend/test_integrationqa.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 821cc6b5c..b5a50de60 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -65,9 +65,9 @@ def test_graph_from_file_local(model_name): def test_graph_from_wikipedia(model_name): """Test graph creation from a Wikipedia page.""" - wiki_query = 'https://en.wikipedia.org/wiki/Ram_Mandir' + wiki_query = 'https://en.wikipedia.org/wiki/Google_DeepMind' source_type = 'Wikipedia' - file_name = "Ram_Mandir" + file_name = "Google_DeepMind" create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, source_type) wiki_result = extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 1, 'en', '', '') @@ -202,7 +202,7 @@ def test_populate_graph_schema_from_text(model): def run_tests(): final_list = [] error_list = [] - models = ['openai-gpt-3.5', 'openai-gpt-4o'] + models = ['openai-gpt-4o','gemini-1.5-pro'] for model_name in models: try: @@ -210,10 +210,10 @@ def run_tests(): final_list.append(test_graph_from_wikipedia(model_name)) final_list.append(test_populate_graph_schema_from_text(model_name)) final_list.append(test_graph_website(model_name)) - final_list.append(test_graph_from_youtube_video(model_name)) - final_list.append(test_chatbot_qna(model_name)) - final_list.append(test_chatbot_qna(model_name, mode='vector')) - final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext')) + # final_list.append(test_graph_from_youtube_video(model_name)) + # final_list.append(test_chatbot_qna(model_name)) + # final_list.append(test_chatbot_qna(model_name, mode='vector')) + # final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext')) except Exception as e: error_list.append((model_name, str(e))) # #Compare and log diffrences in graph results From a12b7cd8f9089f13810a50f4859631edf5ae7b2e Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Tue, 10 Sep 2024 16:52:04 +0000 Subject: [PATCH 063/292] added local search --- backend/score.py | 14 +++--- backend/src/QA_integration.py | 21 ++++---- backend/src/shared/constants.py | 88 ++++++++++++++++++--------------- 3 files changed, 66 insertions(+), 57 deletions(-) diff --git a/backend/score.py b/backend/score.py index 329c8f5b2..6fb245748 100644 --- a/backend/score.py +++ b/backend/score.py @@ -242,13 +242,6 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database graph = create_graph_database_connection(uri, userName, password, database) tasks = set(map(str.strip, json.loads(tasks))) - if "create_communities" in tasks: - model = "openai-gpt-4o" - await asyncio.to_thread(create_communities, uri, userName, password, database,model) - josn_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) - logging.info(f'created communities') - if "materialize_text_chunk_similarities" in tasks: await asyncio.to_thread(update_graph, graph) json_obj = {'api_name': 'post_processing/update_similarity_graph', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} @@ -266,6 +259,13 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database json_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(json_obj) logging.info(f'Entity Embeddings created') + + if "create_communities" in tasks: + model = "openai-gpt-4o" + await asyncio.to_thread(create_communities, uri, userName, password, database,model) + josn_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(josn_obj) + logging.info(f'created communities') return create_api_response('Success', message='All tasks completed successfully') except Exception as e: diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 72c1232fa..bdde16a0b 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -174,7 +174,7 @@ def format_documents(documents, model): return "\n\n".join(formatted_docs), sources -def process_documents(docs, question, messages, llm, model): +def process_documents(docs, question, messages, llm, model,chat_mode_settings): start_time = time.time() try: @@ -187,8 +187,10 @@ def process_documents(docs, question, messages, llm, model): "context": formatted_docs, "input": question }) - - result = get_sources_and_chunks(sources, docs) + if chat_mode_settings == "local_community_search": + result = {"sources": [], "chunkdetails": []} + else: + result = get_sources_and_chunks(sources, docs) content = ai_response.content total_tokens = get_total_tokens(ai_response, llm) @@ -315,9 +317,10 @@ def create_retriever(neo_db, document_names, chat_mode_settings,search_k, score_ def get_neo4j_retriever(graph, document_names,chat_mode_settings, search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): try: - + neo_db = initialize_neo4j_vector(graph, chat_mode_settings) document_names= list(map(str.strip, json.loads(document_names))) + search_k = LOCAL_COMMUNITY_TOP_K if chat_mode_settings["mode"] == "local_community_search" else CHAT_SEARCH_KWARG_K retriever = create_retriever(neo_db, document_names,chat_mode_settings, search_k, score_threshold) return retriever except Exception as e: @@ -328,8 +331,7 @@ def get_neo4j_retriever(graph, document_names,chat_mode_settings, search_k=CHAT_ def setup_chat(model, graph, document_names, chat_mode_settings): start_time = time.time() try: - if model == "diffbot": - model = "openai-gpt-4o" + model = "openai-gpt-4o" if model == "diffbot" else model llm, model_name = get_llm(model=model) logging.info(f"Model called in chat: {model} (version: {model_name})") @@ -351,9 +353,9 @@ def process_chat_response(messages,history, question, model, graph, document_nam llm, doc_retriever, model_version = setup_chat(model, graph, document_names,chat_mode_settings) docs = retrieve_documents(doc_retriever, messages) - + if docs: - content, result, total_tokens = process_documents(docs, question, messages, llm, model) + content, result, total_tokens = process_documents(docs, question, messages, llm, model,chat_mode_settings) else: content = "I couldn't find any relevant documents to answer your question." result = {"sources": [], "chunkdetails": []} @@ -449,7 +451,6 @@ def create_graph_chain(model, graph): except Exception as e: logging.error(f"An error occurred while creating the GraphCypherQAChain instance. : {e}") - def get_graph_response(graph_chain, question): try: cypher_res = graph_chain.invoke({"query": question}) @@ -471,7 +472,7 @@ def get_graph_response(graph_chain, question): } except Exception as e: - logging.error("An error occurred while getting the graph response : {e}") + logging.error(f"An error occurred while getting the graph response : {e}") def process_graph_response(model, graph, question, messages, history): try: diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index c15602e25..e83f6c8e5 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -58,7 +58,7 @@ ## CHAT SETUP CHAT_MAX_TOKENS = 1000 -CHAT_SEARCH_KWARG_K = 3 +CHAT_SEARCH_KWARG_K = 10 CHAT_SEARCH_KWARG_SCORE_THRESHOLD = 0.5 CHAT_DOC_SPLIT_SIZE = 3000 CHAT_EMBEDDING_FILTER_SCORE_THRESHOLD = 0.10 @@ -68,6 +68,13 @@ ("ollama_llama3") : 2 } + +CHAT_TOKEN_CUT_OFF = { + ("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, + ("openai-gpt-4","diffbot" ,'azure_ai_gpt_4o',"openai-gpt-4o", "openai-gpt-4o-mini") : 28, + ("ollama_llama3") : 2 +} + ### CHAT TEMPLATES CHAT_SYSTEM_TEMPLATE = """ You are an AI-powered question-answering agent. Your task is to provide accurate and comprehensive responses to user queries based on the given context, chat history, and available resources. @@ -111,7 +118,6 @@ Note: This system does not generate answers based solely on internal knowledge. It answers from the information provided in the user's current and previous inputs, and from the context. """ - QUESTION_TRANSFORM_TEMPLATE = "Given the below conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else." ## CHAT QUERIES @@ -195,59 +201,59 @@ ### Local community search +LOCAL_COMMUNITY_TOP_K = 10 LOCAL_COMMUNITY_TOP_CHUNKS = 3 LOCAL_COMMUNITY_TOP_COMMUNITIES = 3 LOCAL_COMMUNITY_TOP_OUTSIDE_RELS = 10 -LOCAL_COMMUNITY_TOP_INSIDE_RELS = 10 -LOCAL_COMMUNITY_TOP_ENTITIES = 10 LOCAL_COMMUNITY_SEARCH_QUERY = """ -WITH collect(node) as nodes -WITH -collect { +WITH collect(node) as nodes, avg(score) as score +WITH score, +collect {{ UNWIND nodes as n MATCH (n)<-[:HAS_ENTITY]->(c:Chunk) WITH c, count(distinct n) as freq RETURN c.text AS chunkText ORDER BY freq DESC - LIMIT $topChunks -} AS text_mapping, -// Entity - Report Mapping -collect { + LIMIT {topChunks} +}} AS text_mapping, +collect {{ UNWIND nodes as n MATCH (n)-[:IN_COMMUNITY]->(c:__Community__) WITH c, c.rank as rank, c.weight AS weight RETURN c.summary ORDER BY rank, weight DESC - LIMIT $topCommunities -} AS report_mapping, -// Outside Relationships -collect { - UNWIND nodes as n - MATCH (n)-[r:RELATED]-(m) - WHERE NOT m IN nodes - RETURN r.description AS descriptionText - ORDER BY r.rank, r.weight DESC - LIMIT $topOutsideRels -} as outsideRels, -// Inside Relationships -collect { - UNWIND nodes as n - MATCH (n)-[r:RELATED]-(m) - WHERE m IN nodes - RETURN r.description AS descriptionText - ORDER BY r.rank, r.weight DESC - LIMIT $topInsideRels -} as insideRels, -// Entities description -collect { + LIMIT {topCommunities} +}} AS communities, +collect {{ UNWIND nodes as n - RETURN n.description AS descriptionText -} as entities -// We don't have covariates or claims here -RETURN {Chunks: text_mapping, Reports: report_mapping, - Relationships: outsideRels + insideRels, - Entities: entities} AS text, 1.0 AS score, {} AS metadata + RETURN n.id + " " + coalesce(n.description, "") AS descriptionText +}} as entities, +collect {{ + unwind nodes as n + unwind nodes as m + match path = (n)-[r]->(m) + WITH n,r,m + // ORDER BY r.score DESC LIMIT 10 // doesn't exist in ours + WITH distinct r + RETURN startNode(r).id+" "+type(r)+" "+endNode(r).id as relText + // todo should we have a limit here? +}} as rels, +collect {{ + unwind nodes as n + match path = (n)-[r]-(m:__Entity__) + where not m in nodes + with m, collect(distinct r) as rels, count(*) as freq + ORDER BY freq DESC LIMIT {topOutsideRels} + WITH collect(m) as outsideNodes, apoc.coll.flatten(collect(rels)) as rels + RETURN {{nodes: [n in outsideNodes | n.id + " " + coalesce(n.description, "")], + rels: [r in rels | startNode(r).id+" "+type(r)+" "+endNode(r).id]}} +}} as outsideRels + +RETURN {{chunks: text_mapping, communities: communities, + entities: entities, + relationships: rels, + outside: outsideRels}} AS text, score, {{}} AS metadata ; """ CHAT_MODE_CONFIG_MAP= { @@ -264,7 +270,9 @@ "document_filter": False }, "local_community_search": { - "retrieval_query": LOCAL_COMMUNITY_SEARCH_QUERY, + "retrieval_query": LOCAL_COMMUNITY_SEARCH_QUERY.format(topChunks=LOCAL_COMMUNITY_TOP_CHUNKS, + topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, + topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS), "index_name": "entity_vector", "keyword_index": None, "document_filter": False From 046913863522591b691ac0036c40df4286a66c78 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Wed, 11 Sep 2024 16:06:15 +0530 Subject: [PATCH 064/292] 725 add checkbox for create communities (#728) * DataScience icon addition * added checkbox to create_communities * added gds status to connect call * added conditionall check for community chat modes * icon stroke changes * isgds active check * icon changes * isGdsVal change * format fixes * checkbox check uncheck change * graph query --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> --- backend/src/shared/constants.py | 17 ++++-- .../src/components/ChatBot/ChatModeToggle.tsx | 54 +++++++++---------- frontend/src/components/Content.tsx | 10 ++-- .../ConnectionModal/ConnectionModal.tsx | 5 +- .../Deduplication/index.tsx | 4 +- .../PostProcessingCheckList/index.tsx | 8 ++- .../src/components/UI/DatabaseStatusIcon.tsx | 26 +++++++++ .../src/components/UI/ScienceMolecule.tsx | 40 ++++++++++++++ frontend/src/context/UserCredentials.tsx | 6 +-- frontend/src/types.ts | 4 +- 10 files changed, 129 insertions(+), 45 deletions(-) create mode 100644 frontend/src/components/UI/DatabaseStatusIcon.tsx create mode 100644 frontend/src/components/UI/ScienceMolecule.tsx diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index c5f8e98a4..a46d1f906 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -38,15 +38,26 @@ CALL {{ WITH selectedChunks UNWIND selectedChunks as c - + OPTIONAL MATCH entities=(c:Chunk)-[:HAS_ENTITY]->(e) OPTIONAL MATCH entityRels=(e)--(e2:!Chunk) WHERE exists {{ (e2)<-[:HAS_ENTITY]-(other) WHERE other IN selectedChunks }} - RETURN collect(entities) as entities, collect(entityRels) as entityRels + RETURN entities , entityRels, collect(DISTINCT e) as entity +}} +WITH docs,chunks,chunkRels, collect(entities) as entities, collect(entityRels) as entityRels,entity + +WITH * + +CALL {{ + with entity + unwind entity as n + OPTIONAL MATCH community=(n:__Entity__)-[:IN_COMMUNITY]->(p:__Community__) + OPTIONAL MATCH parentcommunity=(p)-[:PARENT_COMMUNITY]->(p2:__Community__) + return community,parentcommunity }} -WITH apoc.coll.flatten(docs + chunks + chunkRels + entities + entityRels, true) as paths +WITH apoc.coll.flatten(docs + chunks + chunkRels + entities + entityRels + community+ parentcommunity, true) as paths // distinct nodes and rels CALL {{ WITH paths UNWIND paths AS path UNWIND nodes(path) as node WITH distinct node diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index ec219898f..754a791a4 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -5,7 +5,7 @@ import CustomMenu from '../UI/Menu'; import { chatModes } from '../../utils/Constants'; import { capitalize } from '@mui/material'; import { capitalizeWithPlus } from '../../utils/Utils'; - +import { useCredentials } from '../../context/UserCredentials'; export default function ChatModeToggle({ menuAnchor, closeHandler = () => {}, @@ -21,7 +21,8 @@ export default function ChatModeToggle({ }) { const { setchatMode, chatMode, postProcessingTasks } = useFileContext(); const isCommunityAllowed = postProcessingTasks.includes('create_communities'); - + const { isGdsActive } = useCredentials(); + return ( { - if (isCommunityAllowed) { + if (isGdsActive && isCommunityAllowed) { return chatModes?.map((m) => { return { title: m.includes('+') ? capitalizeWithPlus(m) : capitalize(m), @@ -49,29 +50,28 @@ export default function ChatModeToggle({ ), }; }); - } - return chatModes - ?.filter((s) => !s.includes('community')) - ?.map((m) => { - return { - title: m.includes('+') ? capitalizeWithPlus(m) : capitalize(m), - onClick: () => { - setchatMode(m); - }, - disabledCondition: false, - description: ( - - {chatMode === m && ( - <> - Selected - - )} - - ), - }; - }); - - }, [chatMode, chatModes,isCommunityAllowed])} - > + } + return chatModes + ?.filter((s) => !s.includes('community')) + ?.map((m) => { + return { + title: m.includes('+') ? capitalizeWithPlus(m) : capitalize(m), + onClick: () => { + setchatMode(m); + }, + disabledCondition: false, + description: ( + + {chatMode === m && ( + <> + Selected + + )} + + ), + }; + }); + }, [chatMode, chatModes, isCommunityAllowed, isGdsActive])} + /> ); } diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 95f632d38..a9e66f709 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -32,7 +32,7 @@ import DeletePopUp from './Popups/DeletePopUp/DeletePopUp'; import GraphEnhancementDialog from './Popups/GraphEnhancementDialog'; import { tokens } from '@neo4j-ndl/base'; import axios from 'axios'; -import DatabaseStatusIcon from './UI/DatabaseIcon'; +import DatabaseStatusIcon from './UI/DatabaseStatusIcon'; const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); @@ -59,7 +59,7 @@ const Content: React.FC = ({ const [openGraphView, setOpenGraphView] = useState(false); const [inspectedName, setInspectedName] = useState(''); const [connectionStatus, setConnectionStatus] = useState(false); - const { setUserCredentials, userCredentials, isGdsActive } = useCredentials(); + const { setUserCredentials, userCredentials, isGdsActive, setGdsActive } = useCredentials(); const [showConfirmationModal, setshowConfirmationModal] = useState(false); const [extractLoading, setextractLoading] = useState(false); @@ -87,7 +87,6 @@ const Content: React.FC = ({ alertType: 'error', alertMessage: '', }); - const isGdsActive = true; const { updateStatusForLargeFiles } = useServerSideEvent( (inMinutes, time, fileName) => { setalertDetails({ @@ -128,6 +127,9 @@ const Content: React.FC = ({ database: neo4jConnection.database, port: neo4jConnection.uri.split(':')[2], }); + if (neo4jConnection.isgdsActive !== undefined) { + setGdsActive(neo4jConnection.isgdsActive); + } } else { setOpenConnection((prev) => ({ ...prev, openPopUp: true })); } @@ -653,8 +655,6 @@ const Content: React.FC = ({ } }; - console.log('isGds', isGdsActive); - return ( <> {alertDetails.showAlert && ( diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index ab9fe2399..f39f392ec 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -41,7 +41,7 @@ export default function ConnectionModal({ const [username, setUsername] = useState(initialusername ?? 'neo4j'); const [password, setPassword] = useState(''); const [connectionMessage, setMessage] = useState({ type: 'unknown', content: '' }); - const { setUserCredentials, userCredentials } = useCredentials(); + const { setUserCredentials, userCredentials, setGdsActive } = useCredentials(); const [isLoading, setIsLoading] = useState(false); const [searchParams, setSearchParams] = useSearchParams(); const [userDbVectorIndex, setUserDbVectorIndex] = useState(initialuserdbvectorindex ?? undefined); @@ -201,6 +201,8 @@ export default function ConnectionModal({ if (response?.data?.status !== 'Success') { throw new Error(response.data.error); } else { + const isgdsActive = response.data.data.gds_status; + setGdsActive(isgdsActive); localStorage.setItem( 'neo4j.connection', JSON.stringify({ @@ -209,6 +211,7 @@ export default function ConnectionModal({ password: password, database: database, userDbVectorIndex, + isgdsActive, }) ); setUserDbVectorIndex(response.data.data.db_vector_dimension); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index f5a021e30..36fcff3d1 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -80,12 +80,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - (d.e.elementId === nodeid + d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d) + : d ); }); }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx index 57ebe90c4..0051fefbf 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx @@ -32,7 +32,11 @@ export default function PostProcessingCheckList() { .join(' ')} } - checked={postProcessingTasks.includes(job.title)} + checked={ + isCreateCommunities + ? isGdsActive && postProcessingTasks.includes(job.title) + : postProcessingTasks.includes(job.title) + } onChange={(e) => { if (e.target.checked) { setPostProcessingTasks((prev) => [...prev, job.title]); @@ -40,7 +44,7 @@ export default function PostProcessingCheckList() { setPostProcessingTasks((prev) => prev.filter((s) => s !== job.title)); } }} - disabled={isCreateCommunities && !isGdsActive} // Disable if GDS is not active + disabled={isCreateCommunities && !isGdsActive} /> {job.description}
diff --git a/frontend/src/components/UI/DatabaseStatusIcon.tsx b/frontend/src/components/UI/DatabaseStatusIcon.tsx new file mode 100644 index 000000000..7b159607e --- /dev/null +++ b/frontend/src/components/UI/DatabaseStatusIcon.tsx @@ -0,0 +1,26 @@ +import { CircleStackIconOutline } from '@neo4j-ndl/react/icons'; +import { IconWithToolTip } from './IconButtonToolTip'; +import { DatabaseStatusProps } from '../../types'; +import { connectionLabels } from '../../utils/Constants'; +import ScienceMoleculeIcon from '../UI/ScienceMolecule'; + +const DatabaseStatusIcon: React.FC = ({ isConnected, isGdsActive, uri }) => { + const strokeColour = isConnected ? connectionLabels.greenStroke : connectionLabels.redStroke; + const text = isGdsActive ? connectionLabels.graphDataScience : connectionLabels.graphDatabase; + return ( +
+ + + {isGdsActive ? ( + + ) : ( + + )} + + + {isConnected ? uri : connectionLabels.notConnected} +
+ ); +}; + +export default DatabaseStatusIcon; diff --git a/frontend/src/components/UI/ScienceMolecule.tsx b/frontend/src/components/UI/ScienceMolecule.tsx new file mode 100644 index 000000000..6cd046f14 --- /dev/null +++ b/frontend/src/components/UI/ScienceMolecule.tsx @@ -0,0 +1,40 @@ +import React from 'react'; + +interface ScienceMoleculeIconProps { + currentColour: string; +} + +const ScienceMoleculeIcon: React.FC = ({ currentColour }) => ( + + + + + +); + +export default ScienceMoleculeIcon; diff --git a/frontend/src/context/UserCredentials.tsx b/frontend/src/context/UserCredentials.tsx index af59e41f4..14d5b87de 100644 --- a/frontend/src/context/UserCredentials.tsx +++ b/frontend/src/context/UserCredentials.tsx @@ -8,8 +8,8 @@ type Props = { export const UserConnection = createContext({ userCredentials: null, setUserCredentials: () => null, - isGdsActive: null, - setGdsActive: () => null, + isGdsActive: false, + setGdsActive: () => false, }); export const useCredentials = () => { const userCredentials = useContext(UserConnection); @@ -17,7 +17,7 @@ export const useCredentials = () => { }; const UserCredentialsWrapper: FunctionComponent = (props) => { const [userCredentials, setUserCredentials] = useState(null); - const [isGdsActive, setGdsActive] = useReducer((s) => !s, false); + const [isGdsActive, setGdsActive] = useState(false); const value = { userCredentials, setUserCredentials, diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 0cd584f5d..4e5a03d77 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -643,8 +643,8 @@ export interface DrawerChatbotProps { export interface ContextProps { userCredentials: UserCredentials | null; setUserCredentials: (UserCredentials: UserCredentials) => void; - isGdsActive: boolean | null; - setGdsActive: () => void; + isGdsActive: boolean; + setGdsActive: Dispatch>; } export interface MessageContextType { messages: Messages[] | []; From 177f6c8d345f110396796240cf885a86e173ff37 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Wed, 11 Sep 2024 12:06:19 +0000 Subject: [PATCH 065/292] modified local search query --- backend/src/QA_integration.py | 30 +++++++++++++--------- backend/src/shared/constants.py | 44 +++++++++++++++------------------ 2 files changed, 38 insertions(+), 36 deletions(-) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index bdde16a0b..ecea7b50b 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -117,11 +117,11 @@ def get_sources_and_chunks(sources_used, docs): result = { 'sources': sources_used, - 'chunkdetails': chunkdetails_list + 'chunkdetails': chunkdetails_list, + "entities" : list() } return result - def get_rag_chain(llm, system_template=CHAT_SYSTEM_TEMPLATE): try: question_answering_prompt = ChatPromptTemplate.from_messages( @@ -155,12 +155,15 @@ def format_documents(documents, model): formatted_docs = [] sources = set() + lc_entities = {} for doc in sorted_documents: try: - source = doc.metadata.get('source', 'unknown') + source = doc.metadata.get('source', "unknown") sources.add(source) - + + lc_entities = doc.metadata if 'entities'in doc.metadata.keys() else lc_entities + formatted_doc = ( "Document start\n" f"This Document belongs to the source {source}\n" @@ -172,13 +175,13 @@ def format_documents(documents, model): except Exception as e: logging.error(f"Error formatting document: {e}") - return "\n\n".join(formatted_docs), sources + return "\n\n".join(formatted_docs), sources,lc_entities def process_documents(docs, question, messages, llm, model,chat_mode_settings): start_time = time.time() try: - formatted_docs, sources = format_documents(docs, model) + formatted_docs, sources,lc_entities = format_documents(docs, model) rag_chain = get_rag_chain(llm=llm) @@ -187,8 +190,10 @@ def process_documents(docs, question, messages, llm, model,chat_mode_settings): "context": formatted_docs, "input": question }) - if chat_mode_settings == "local_community_search": - result = {"sources": [], "chunkdetails": []} + if chat_mode_settings["mode"] == "local_community_search": + result = {'sources': list(), + 'chunkdetails': list()} + result.update(lc_entities) else: result = get_sources_and_chunks(sources, docs) content = ai_response.content @@ -352,8 +357,7 @@ def process_chat_response(messages,history, question, model, graph, document_nam try: llm, doc_retriever, model_version = setup_chat(model, graph, document_names,chat_mode_settings) - docs = retrieve_documents(doc_retriever, messages) - + docs = retrieve_documents(doc_retriever, messages) if docs: content, result, total_tokens = process_documents(docs, question, messages, llm, model,chat_mode_settings) else: @@ -374,7 +378,8 @@ def process_chat_response(messages,history, question, model, graph, document_nam "chunkdetails": result["chunkdetails"], "total_tokens": total_tokens, "response_time": 0, - "mode": chat_mode_settings["mode"] + "mode": chat_mode_settings["mode"], + "entities" : result["entities"] }, "user": "chatbot" } @@ -390,7 +395,8 @@ def process_chat_response(messages,history, question, model, graph, document_nam "total_tokens": 0, "response_time": 0, "error": f"{type(e).__name__}: {str(e)}", - "mode": chat_mode_settings["mode"] + "mode": chat_mode_settings["mode"], + "entities": [] }, "user": "chatbot" } diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index e83f6c8e5..f0e9531ef 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -207,36 +207,29 @@ LOCAL_COMMUNITY_TOP_OUTSIDE_RELS = 10 LOCAL_COMMUNITY_SEARCH_QUERY = """ -WITH collect(node) as nodes, avg(score) as score -WITH score, +WITH collect(node) as nodes, avg(score) as score, collect({{id: elementId(node), score:score}}) as metadata +WITH score, nodes,metadata, collect {{ UNWIND nodes as n MATCH (n)<-[:HAS_ENTITY]->(c:Chunk) WITH c, count(distinct n) as freq - RETURN c.text AS chunkText + RETURN c ORDER BY freq DESC LIMIT {topChunks} -}} AS text_mapping, +}} AS chunks, collect {{ UNWIND nodes as n MATCH (n)-[:IN_COMMUNITY]->(c:__Community__) WITH c, c.rank as rank, c.weight AS weight - RETURN c.summary + RETURN c ORDER BY rank, weight DESC LIMIT {topCommunities} }} AS communities, -collect {{ - UNWIND nodes as n - RETURN n.id + " " + coalesce(n.description, "") AS descriptionText -}} as entities, collect {{ unwind nodes as n unwind nodes as m - match path = (n)-[r]->(m) - WITH n,r,m - // ORDER BY r.score DESC LIMIT 10 // doesn't exist in ours - WITH distinct r - RETURN startNode(r).id+" "+type(r)+" "+endNode(r).id as relText + match (n)-[r]->(m) + RETURN distinct r // todo should we have a limit here? }} as rels, collect {{ @@ -246,14 +239,17 @@ with m, collect(distinct r) as rels, count(*) as freq ORDER BY freq DESC LIMIT {topOutsideRels} WITH collect(m) as outsideNodes, apoc.coll.flatten(collect(rels)) as rels - RETURN {{nodes: [n in outsideNodes | n.id + " " + coalesce(n.description, "")], - rels: [r in rels | startNode(r).id+" "+type(r)+" "+endNode(r).id]}} -}} as outsideRels - -RETURN {{chunks: text_mapping, communities: communities, - entities: entities, - relationships: rels, - outside: outsideRels}} AS text, score, {{}} AS metadata ; + RETURN {{ nodes: outsideNodes, rels: rels }} +}} as outside + +RETURN {{chunks: [c in chunks | c.text], + communities: [c in communities | c.summary], + entities: [n in nodes | apoc.coll.removeAll(labels(n),["__Entity__"])[0] + ":"+ n.id + " " + coalesce(n.description, "")], + relationships: [r in rels | startNode(r).id+" "+type(r)+" "+endNode(r).id], + outside: {{ + nodes: [n in outside[0].nodes | apoc.coll.removeAll(labels(n),["__Entity__"])[0] + ":"+n.id + " " + coalesce(n.description, "")], + relationships: [r in outside[0].rels | apoc.coll.removeAll(labels(startNode(r)),["__Entity__"])[0] + ":"+startNode(r).id+" "+type(r)+" "+apoc.coll.removeAll(labels(startNode(r)),["__Entity__"])[0] + ":"+endNode(r).id]}} + }} AS text, score, {{entities:metadata}} as metadata """ CHAT_MODE_CONFIG_MAP= { @@ -271,8 +267,8 @@ }, "local_community_search": { "retrieval_query": LOCAL_COMMUNITY_SEARCH_QUERY.format(topChunks=LOCAL_COMMUNITY_TOP_CHUNKS, - topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, - topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS), + topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, + topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS), "index_name": "entity_vector", "keyword_index": None, "document_filter": False From 36c9dd3651f6d8823e1d78e7304004b5f1894261 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:25:38 +0000 Subject: [PATCH 066/292] added entity details for chat --- backend/score.py | 4 +- backend/src/chunkid_entities.py | 86 ++++++++++++++++++++------------- backend/src/graph_query.py | 6 +-- backend/src/shared/constants.py | 39 ++++++++++++++- 4 files changed, 95 insertions(+), 40 deletions(-) diff --git a/backend/score.py b/backend/score.py index 6fb245748..ad8ab638e 100644 --- a/backend/score.py +++ b/backend/score.py @@ -306,10 +306,10 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), gc.collect() @app.post("/chunk_entities") -async def chunk_entities(uri=Form(),userName=Form(), password=Form(), chunk_ids=Form(None)): +async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), chunk_ids=Form(None),is_entity=Form()): try: logging.info(f"URI: {uri}, Username: {userName}, chunk_ids: {chunk_ids}") - result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, chunk_ids=chunk_ids) + result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,chunk_ids=chunk_ids,is_entity=is_entity) json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(json_obj) return create_api_response('Success',data=result) diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index cdbd358d5..1e7ea197a 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -1,24 +1,6 @@ import logging -from neo4j import graph from src.graph_query import * - -CHUNK_QUERY = """ -match (chunk:Chunk) where chunk.id IN $chunksIds - -MATCH (chunk)-[:PART_OF]->(d:Document) -CALL {WITH chunk -MATCH (chunk)-[:HAS_ENTITY]->(e) -MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk &! Document) -UNWIND rels as r -RETURN collect(distinct r) as rels -} -WITH d, collect(distinct chunk) as chunks, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels -RETURN d as doc, [chunk in chunks | chunk {.*, embedding:null}] as chunks, - [r in rels | {startNode:{element_id:elementId(startNode(r)), labels:labels(startNode(r)), properties:{id:startNode(r).id,description:startNode(r).description}}, - endNode:{element_id:elementId(endNode(r)), labels:labels(endNode(r)), properties:{id:endNode(r).id,description:endNode(r).description}}, - relationship: {type:type(r), element_id:elementId(r)}}] as entities -""" - +from src.shared.constants import * def process_records(records): """ @@ -86,8 +68,31 @@ def process_chunk_data(chunk_data): return chunk_properties except Exception as e: logging.error(f"chunkid_entities module: An error occurred while extracting the Chunk text from records: {e}") - -def get_entities_from_chunkids(uri, username, password, chunk_ids): + +def process_chunkids(driver,chunk_ids): + logging.info(f"Starting graph query process for chunk ids") + chunk_ids_list = chunk_ids.split(",") + records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids_list) + result = process_records(records) + logging.info(f"Nodes and relationships are processed") + result["chunk_data"] = process_chunk_data(records) + logging.info(f"Query process completed successfully for chunk ids") + return result + +def process_entityids(driver,chunk_ids): + logging.info(f"Starting graph query process for entity ids") + entity_ids_list = chunk_ids.split(",") + query_body = LOCAL_COMMUNITY_SEARCH_QUERY.format(topChunks=LOCAL_COMMUNITY_TOP_CHUNKS,topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES,topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS) + query = LOCAL_COMMUNITY_DETAILS_QUERY_PREFIX+ query_body +LOCAL_COMMUNITY_DETAILS_QUERY_SUFFIX + records, summary, keys = driver.execute_query(query, entityIds=entity_ids_list) + result = process_records(records) + logging.info(f"Nodes and relationships are processed") + result["chunk_data"] = process_chunk_data(records) + result["community_data"] = records["communities"] + logging.info(f"Query process completed successfully for chunk ids") + return result + +def get_entities_from_chunkids(uri, username, password, database ,chunk_ids,is_entity=False): """ Retrieve and process nodes and relationships from a graph database given a list of chunk IDs. @@ -101,25 +106,40 @@ def get_entities_from_chunkids(uri, username, password, chunk_ids): dict: A dictionary with 'nodes' and 'relationships' keys containing processed data, or an error message. """ try: - logging.info(f"Starting graph query process for chunk ids") - if chunk_ids: - chunk_ids_list = chunk_ids.split(",") - driver = get_graphDB_driver(uri, username, password) - records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids_list) - result = process_records(records) - logging.info(f"Nodes and relationships are processed") - result["chunk_data"] = process_chunk_data(records) - logging.info(f"Query process completed successfully for chunk ids") + + driver = get_graphDB_driver(uri, username, password,database) + if not is_entity: + if chunk_ids: + result = process_chunkids(driver,chunk_ids) + else: + logging.info(f"chunkid_entities module: No chunk ids are passed") + result = { + "nodes": [], + "relationships": [], + "chunk_data":[] + } return result + if chunk_ids: + result = process_entityids(driver,chunk_ids) else: - logging.info(f"chunkid_entities module: No chunk ids are passed") + logging.info(f"chunkid_entities module: No entity ids are passed") result = { "nodes": [], "relationships": [], - "chunk_data":[] + "chunk_data":[], + "community_data":[] } return result except Exception as e: logging.error(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Error: {str(e)}") - raise Exception(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Please check the logs for more details.") from e \ No newline at end of file + raise Exception(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Please check the logs for more details.") from e + + + +uri = "neo4j+s://demo.neo4jlabs.com" +userName = "persistent" +password = "848345f99c41b3844dc02bb4e6295f3e" +database = "persistent2" +ids = "4:b104b2e7-e2ed-4902-b78b-7ad1518ca04f:23","4:b104b2e7-e2ed-4902-b78b-7ad1518ca04f:224" +get_entities_from_chunkids(uri=uri, username=userName, password=password,database=database,chunk_ids=ids,is_entity=True) \ No newline at end of file diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py index 5468fe2c3..b98dd1aa9 100644 --- a/backend/src/graph_query.py +++ b/backend/src/graph_query.py @@ -8,7 +8,7 @@ # watch("neo4j") -def get_graphDB_driver(uri, username, password): +def get_graphDB_driver(uri, username, password,database="neo4j"): """ Creates and returns a Neo4j database driver instance configured with the provided credentials. @@ -20,9 +20,9 @@ def get_graphDB_driver(uri, username, password): logging.info(f"Attempting to connect to the Neo4j database at {uri}") enable_user_agent = os.environ.get("ENABLE_USER_AGENT", "False").lower() in ("true", "1", "yes") if enable_user_agent: - driver = GraphDatabase.driver(uri, auth=(username, password), user_agent=os.environ.get('NEO4J_USER_AGENT')) + driver = GraphDatabase.driver(uri, auth=(username, password),database=database, user_agent=os.environ.get('NEO4J_USER_AGENT')) else: - driver = GraphDatabase.driver(uri, auth=(username, password)) + driver = GraphDatabase.driver(uri, auth=(username, password),database=database) logging.info("Connection successful") return driver except Exception as e: diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index f0e9531ef..b1f25998e 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -56,6 +56,23 @@ """ +CHUNK_QUERY = """ +match (chunk:Chunk) where chunk.id IN $chunksIds + +MATCH (chunk)-[:PART_OF]->(d:Document) +CALL {WITH chunk +MATCH (chunk)-[:HAS_ENTITY]->(e) +MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk &! Document) +UNWIND rels as r +RETURN collect(distinct r) as rels +} +WITH d, collect(distinct chunk) as chunks, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels +RETURN d as doc, [chunk in chunks | chunk {.*, embedding:null}] as chunks, + [r in rels | {startNode:{element_id:elementId(startNode(r)), labels:labels(startNode(r)), properties:{id:startNode(r).id,description:startNode(r).description}}, + endNode:{element_id:elementId(endNode(r)), labels:labels(endNode(r)), properties:{id:endNode(r).id,description:endNode(r).description}}, + relationship: {type:type(r), element_id:elementId(r)}}] as entities +""" + ## CHAT SETUP CHAT_MAX_TOKENS = 1000 CHAT_SEARCH_KWARG_K = 10 @@ -199,7 +216,6 @@ RETURN text, avg_score as score, {{length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails}} AS metadata """ - ### Local community search LOCAL_COMMUNITY_TOP_K = 10 LOCAL_COMMUNITY_TOP_CHUNKS = 3 @@ -241,7 +257,10 @@ WITH collect(m) as outsideNodes, apoc.coll.flatten(collect(rels)) as rels RETURN {{ nodes: outsideNodes, rels: rels }} }} as outside +""" + +LOCAL_COMMUNITY_SEARCH_QUERY_SUFFIX = """ RETURN {{chunks: [c in chunks | c.text], communities: [c in communities | c.summary], entities: [n in nodes | apoc.coll.removeAll(labels(n),["__Entity__"])[0] + ":"+ n.id + " " + coalesce(n.description, "")], @@ -252,6 +271,22 @@ }} AS text, score, {{entities:metadata}} as metadata """ +LOCAL_COMMUNITY_DETAILS_QUERY_PREFIX = """ +UNWIND $entityIds as id +MATCH (node) WHERE elementId(node) = id +WITH node, 1.0 as score +""" + +LOCAL_COMMUNITY_DETAILS_QUERY_SUFFIX = """ +RETURN [chunk in chunks | chunk {.*, embedding:null}] as chunks, +[community in communities | community {.*, embedding:null}] as communities, +[node in nodes+outside[0].nodes | node {.*, label:labels(node),embedding:null}] as nodes, +[r in rels+outside[0].rels | {startNode:{element_id:elementId(startNode(r)), labels:labels(startNode(r)), properties:{id:startNode(r).id,description:startNode(r).description}}, + endNode:{element_id:elementId(endNode(r)), labels:labels(endNode(r)), properties:{id:endNode(r).id,description:endNode(r).description}}, + relationship: {type:type(r), element_id:elementId(r)}}] as entities +""" + + CHAT_MODE_CONFIG_MAP= { "vector": { "retrieval_query": VECTOR_SEARCH_QUERY, @@ -268,7 +303,7 @@ "local_community_search": { "retrieval_query": LOCAL_COMMUNITY_SEARCH_QUERY.format(topChunks=LOCAL_COMMUNITY_TOP_CHUNKS, topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, - topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS), + topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS)+LOCAL_COMMUNITY_SEARCH_QUERY_SUFFIX, "index_name": "entity_vector", "keyword_index": None, "document_filter": False From 1aaeab643d5e03d4fb3b1b270b233f42247de64b Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:29:23 +0000 Subject: [PATCH 067/292] modified chunkids --- backend/src/chunkid_entities.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index 1e7ea197a..84b1d3f5f 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -134,12 +134,3 @@ def get_entities_from_chunkids(uri, username, password, database ,chunk_ids,is_e except Exception as e: logging.error(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Error: {str(e)}") raise Exception(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Please check the logs for more details.") from e - - - -uri = "neo4j+s://demo.neo4jlabs.com" -userName = "persistent" -password = "848345f99c41b3844dc02bb4e6295f3e" -database = "persistent2" -ids = "4:b104b2e7-e2ed-4902-b78b-7ad1518ca04f:23","4:b104b2e7-e2ed-4902-b78b-7ad1518ca04f:224" -get_entities_from_chunkids(uri=uri, username=userName, password=password,database=database,chunk_ids=ids,is_entity=True) \ No newline at end of file From ebb4c917106431132c29bb1a881b5259f16f6565 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Wed, 11 Sep 2024 16:25:34 +0000 Subject: [PATCH 068/292] modified chunk_entities --- backend/src/chunkid_entities.py | 84 +++++++++++++++++++++++---------- backend/src/shared/constants.py | 2 +- 2 files changed, 61 insertions(+), 25 deletions(-) diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index 84b1d3f5f..e6ca8a512 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -69,28 +69,64 @@ def process_chunk_data(chunk_data): except Exception as e: logging.error(f"chunkid_entities module: An error occurred while extracting the Chunk text from records: {e}") -def process_chunkids(driver,chunk_ids): - logging.info(f"Starting graph query process for chunk ids") - chunk_ids_list = chunk_ids.split(",") - records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids_list) - result = process_records(records) - logging.info(f"Nodes and relationships are processed") - result["chunk_data"] = process_chunk_data(records) - logging.info(f"Query process completed successfully for chunk ids") - return result - -def process_entityids(driver,chunk_ids): - logging.info(f"Starting graph query process for entity ids") - entity_ids_list = chunk_ids.split(",") - query_body = LOCAL_COMMUNITY_SEARCH_QUERY.format(topChunks=LOCAL_COMMUNITY_TOP_CHUNKS,topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES,topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS) - query = LOCAL_COMMUNITY_DETAILS_QUERY_PREFIX+ query_body +LOCAL_COMMUNITY_DETAILS_QUERY_SUFFIX - records, summary, keys = driver.execute_query(query, entityIds=entity_ids_list) - result = process_records(records) - logging.info(f"Nodes and relationships are processed") - result["chunk_data"] = process_chunk_data(records) - result["community_data"] = records["communities"] - logging.info(f"Query process completed successfully for chunk ids") - return result +def process_chunkids(driver, chunk_ids): + """ + Processes chunk IDs to retrieve chunk data. + """ + try: + logging.info(f"Starting graph query process for chunk ids: {chunk_ids}") + chunk_ids_list = chunk_ids.split(",") + + records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids_list) + result = process_records(records) + logging.info(f"Nodes and relationships are processed") + + result["chunk_data"] = process_chunk_data(records) + logging.info(f"Query process completed successfully for chunk ids: {chunk_ids}") + return result + except Exception as e: + logging.error(f"chunkid_entities module: Error processing chunk ids: {chunk_ids}. Error: {e}") + raise + +def remove_duplicate_nodes(nodes): + unique_nodes = [] + seen_element_ids = set() + + for node in nodes: + element_id = node['element_id'] + if element_id not in seen_element_ids: + unique_nodes.append(node) + seen_element_ids.add(element_id) + + return unique_nodes + +def process_entityids(driver, chunk_ids): + """ + Processes entity IDs to retrieve local community data. + """ + try: + logging.info(f"Starting graph query process for entity ids: {chunk_ids}") + entity_ids_list = chunk_ids.split(",") + query_body = LOCAL_COMMUNITY_SEARCH_QUERY.format( + topChunks=LOCAL_COMMUNITY_TOP_CHUNKS, + topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, + topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS + ) + query = LOCAL_COMMUNITY_DETAILS_QUERY_PREFIX + query_body + LOCAL_COMMUNITY_DETAILS_QUERY_SUFFIX + + records, summary, keys = driver.execute_query(query, entityIds=entity_ids_list) + + result = process_records(records) + result["nodes"].extend(records[0]["nodes"]) + result["nodes"] = remove_duplicate_nodes(result["nodes"]) + logging.info(f"Nodes and relationships are processed") + result["chunk_data"] = records[0]["chunks"] + result["community_data"] = records[0]["communities"] + logging.info(f"Query process completed successfully for chunk ids: {chunk_ids}") + return result + except Exception as e: + logging.error(f"chunkid_entities module: Error processing entity ids: {chunk_ids}. Error: {e}") + raise def get_entities_from_chunkids(uri, username, password, database ,chunk_ids,is_entity=False): """ @@ -129,8 +165,8 @@ def get_entities_from_chunkids(uri, username, password, database ,chunk_ids,is_e "chunk_data":[], "community_data":[] } - return result + return result except Exception as e: logging.error(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Error: {str(e)}") - raise Exception(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Please check the logs for more details.") from e + raise Exception(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Please check the logs for more details.") from e \ No newline at end of file diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index b1f25998e..64e1db36f 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -280,7 +280,7 @@ LOCAL_COMMUNITY_DETAILS_QUERY_SUFFIX = """ RETURN [chunk in chunks | chunk {.*, embedding:null}] as chunks, [community in communities | community {.*, embedding:null}] as communities, -[node in nodes+outside[0].nodes | node {.*, label:labels(node),embedding:null}] as nodes, +[node in nodes+outside[0].nodes | {element_id:elementId(node), labels:labels(node), properties:{id:node.id,description:node.description}}] as nodes, [r in rels+outside[0].rels | {startNode:{element_id:elementId(startNode(r)), labels:labels(startNode(r)), properties:{id:startNode(r).id,description:startNode(r).description}}, endNode:{element_id:elementId(endNode(r)), labels:labels(endNode(r)), properties:{id:endNode(r).id,description:endNode(r).description}}, relationship: {type:type(r), element_id:elementId(r)}}] as entities From c72962ca8f8892c5c12d124689b990ed18c665ea Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Wed, 11 Sep 2024 23:07:32 +0530 Subject: [PATCH 069/292] Add communities Checkbox to graph viz (#739) * DataScience icon addition * added checkbox to create_communities * added gds status to connect call * added conditionall check for community chat modes * icon stroke changes * isgds active check * icon changes * isGdsVal change * format fixes * checkbox check uncheck change * graph query * checkbox addition * filter logic * filter logic missing checks * updated_graph_query * filter logic optimised * gds active check * handle checkbox show --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> --- backend/src/graph_query.py | 7 +- backend/src/shared/constants.py | 9 ++- .../src/components/ChatBot/ChatModeToggle.tsx | 1 - .../components/Graph/CheckboxSelection.tsx | 14 +++- .../src/components/Graph/GraphViewModal.tsx | 50 +++++++----- frontend/src/types.ts | 3 +- frontend/src/utils/Constants.ts | 42 ++++++---- frontend/src/utils/Utils.ts | 77 +++++++++++++++++-- 8 files changed, 148 insertions(+), 55 deletions(-) diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py index 5468fe2c3..8a8576905 100644 --- a/backend/src/graph_query.py +++ b/backend/src/graph_query.py @@ -61,15 +61,18 @@ def process_node(node): with datetime objects formatted as ISO strings. """ try: + labels = set(node.labels) + labels.discard("__Entity__") + node_element = { "element_id": node.element_id, - "labels": list(node.labels), + "labels": list(labels), "properties": {} } # logging.info(f"Processing node with element ID: {node.element_id}") for key in node: - if key in ["embedding", "text"]: + if key in ["embedding", "text", "summary"]: continue value = node.get(key) if isinstance(value, time.DateTime): diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index a46d1f906..8944714f8 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -16,6 +16,7 @@ PROJECT_ID = 'llm-experiments-387609' GRAPH_CHUNK_LIMIT = 50 + #query GRAPH_QUERY = """ MATCH docs = (d:Document) @@ -45,7 +46,7 @@ }} RETURN entities , entityRels, collect(DISTINCT e) as entity }} -WITH docs,chunks,chunkRels, collect(entities) as entities, collect(entityRels) as entityRels,entity +WITH docs,chunks,chunkRels, collect(entities) as entities, collect(entityRels) as entityRels, entity WITH * @@ -53,11 +54,11 @@ with entity unwind entity as n OPTIONAL MATCH community=(n:__Entity__)-[:IN_COMMUNITY]->(p:__Community__) - OPTIONAL MATCH parentcommunity=(p)-[:PARENT_COMMUNITY]->(p2:__Community__) - return community,parentcommunity + OPTIONAL MATCH parentcommunity=(p)-[:PARENT_COMMUNITY*]->(p2:__Community__) + return collect(community) as communities , collect(parentcommunity) as parentCommunities }} -WITH apoc.coll.flatten(docs + chunks + chunkRels + entities + entityRels + community+ parentcommunity, true) as paths +WITH apoc.coll.flatten(docs + chunks + chunkRels + entities + entityRels + communities + parentCommunities, true) as paths // distinct nodes and rels CALL {{ WITH paths UNWIND paths AS path UNWIND nodes(path) as node WITH distinct node diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 754a791a4..6315394af 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -22,7 +22,6 @@ export default function ChatModeToggle({ const { setchatMode, chatMode, postProcessingTasks } = useFileContext(); const isCommunityAllowed = postProcessingTasks.includes('create_communities'); const { isGdsActive } = useCredentials(); - return ( = ({ graphType, loading, handleChange }) => ( +import { graphLabels } from '../../utils/Constants'; + +const CheckboxSelection: React.FC = ({ graphType, loading, handleChange, isgds }) => (
handleChange('DocumentChunk')} /> handleChange('Entities')} /> + {isgds && ( handleChange('Communities')} + />)}
); diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 07f276e3f..8e8cb5c87 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -58,17 +58,17 @@ const GraphViewModal: React.FunctionComponent = ({ const nvlRef = useRef(null); const [nodes, setNodes] = useState([]); const [relationships, setRelationships] = useState([]); - const [graphType, setGraphType] = useState(intitalGraphType); const [allNodes, setAllNodes] = useState([]); const [allRelationships, setAllRelationships] = useState([]); const [loading, setLoading] = useState(false); const [status, setStatus] = useState<'unknown' | 'success' | 'danger'>('unknown'); const [statusMessage, setStatusMessage] = useState(''); - const { userCredentials } = useCredentials(); + const { userCredentials, isGdsActive } = useCredentials(); const [scheme, setScheme] = useState({}); const [newScheme, setNewScheme] = useState({}); const [searchQuery, setSearchQuery] = useState(''); const debouncedQuery = useDebounce(searchQuery, 300); + const [graphType, setGraphType] = useState(intitalGraphType(isGdsActive)); // the checkbox selection const handleCheckboxChange = (graph: GraphType) => { @@ -97,10 +97,10 @@ const GraphViewModal: React.FunctionComponent = ({ graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -119,7 +119,7 @@ const GraphViewModal: React.FunctionComponent = ({ if (nvlRef.current) { nvlRef.current?.destroy(); } - setGraphType(intitalGraphType); + setGraphType(intitalGraphType(isGdsActive)); clearTimeout(timeoutId); setScheme({}); setNodes([]); @@ -135,10 +135,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -180,6 +180,7 @@ const GraphViewModal: React.FunctionComponent = ({ useEffect(() => { if (open) { setLoading(true); + setGraphType(intitalGraphType(isGdsActive)) if (viewPoint !== 'chatInfoView') { graphApi(); } else { @@ -193,7 +194,7 @@ const GraphViewModal: React.FunctionComponent = ({ setLoading(false); } } - }, [open]); + }, [open, isGdsActive]); // The search and update nodes const handleSearch = useCallback( @@ -219,8 +220,8 @@ const GraphViewModal: React.FunctionComponent = ({ match && viewPoint === graphLabels.showGraphView ? 100 : match && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, + ? 50 + : graphLabels.nodeSize, }; }); // deactivating any active relationships @@ -252,7 +253,8 @@ const GraphViewModal: React.FunctionComponent = ({ graphType, finalNodes ?? [], finalRels ?? [], - schemeVal + schemeVal, + isGdsActive ); setNodes(filteredNodes); setRelationships(filteredRelations); @@ -305,7 +307,7 @@ const GraphViewModal: React.FunctionComponent = ({ setStatusMessage(''); setGraphViewOpen(false); setScheme({}); - setGraphType(intitalGraphType); + setGraphType(intitalGraphType(isGdsActive)); setNodes([]); setRelationships([]); setAllNodes([]); @@ -351,8 +353,8 @@ const GraphViewModal: React.FunctionComponent = ({ isActive && viewPoint === graphLabels.showGraphView ? 100 : isActive && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, + ? 50 + : graphLabels.nodeSize, }; }); // deactivating any active relationships @@ -394,6 +396,9 @@ const GraphViewModal: React.FunctionComponent = ({ setNodes(updatedNodes); }; + console.log('rels', relationships); + + console.log('nodes', nodes); return ( <> = ({ {headerTitle} {checkBoxView && ( - + )} @@ -427,7 +437,7 @@ const GraphViewModal: React.FunctionComponent = ({
) : nodes.length === 0 && relationships.length === 0 && graphType.length !== 0 ? (
- +
) : graphType.length === 0 ? (
diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 4e5a03d77..4fb5361f2 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -263,7 +263,7 @@ export interface GraphViewModalProps { selectedRows?: CustomFile[] | undefined; } -export type GraphType = 'Entities' | 'DocumentChunk'; +export type GraphType = 'Entities' | 'DocumentChunk' | 'Communities'; export type PartialLabelNode = Partial & { labels: string; @@ -273,6 +273,7 @@ export interface CheckboxSectionProps { graphType: GraphType[]; loading: boolean; handleChange: (graph: GraphType) => void; + isgds: boolean; } export interface fileName { diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index d3fef33fe..e6b11e798 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -36,26 +36,26 @@ export const llms = process.env?.VITE_LLM_MODELS?.trim() != '' ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) : [ - 'diffbot', - 'openai-gpt-3.5', - 'openai-gpt-4o', - 'openai-gpt-4o-mini', - 'gemini-1.0-pro', - 'gemini-1.5-pro', - 'azure_ai_gpt_35', - 'azure_ai_gpt_4o', - 'ollama_llama3', - 'groq_llama3_70b', - 'anthropic_claude_3_5_sonnet', - 'fireworks_v3p1_405b', - 'bedrock_claude_3_5_sonnet', - ]; + 'diffbot', + 'openai-gpt-3.5', + 'openai-gpt-4o', + 'openai-gpt-4o-mini', + 'gemini-1.0-pro', + 'gemini-1.5-pro', + 'azure_ai_gpt_35', + 'azure_ai_gpt_4o', + 'ollama_llama3', + 'groq_llama3_70b', + 'anthropic_claude_3_5_sonnet', + 'fireworks_v3p1_405b', + 'bedrock_claude_3_5_sonnet', + ]; export const defaultLLM = llms?.includes('openai-gpt-4o') ? 'openai-gpt-4o' : llms?.includes('gemini-1.0-pro') - ? 'gemini-1.0-pro' - : 'diffbot'; + ? 'gemini-1.0-pro' + : 'diffbot'; export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',') @@ -215,7 +215,12 @@ export const graphView: OptionType[] = [ { label: 'Entity Graph', value: queryMap.Entities }, { label: 'Knowledge Graph', value: queryMap.DocChunkEntities }, ]; -export const intitalGraphType: GraphType[] = ['DocumentChunk', 'Entities']; + +export const intitalGraphType = (isGDSActive: boolean): GraphType[] => { + return isGDSActive + ? ['DocumentChunk', 'Entities', 'Communities'] // GDS is active, include communities + : ['DocumentChunk', 'Entities']; // GDS is inactive, exclude communities +}; export const appLabels = { ownSchema: 'Or Define your own Schema', @@ -237,6 +242,9 @@ export const graphLabels = { selectCheckbox: 'Select atleast one checkbox for graph view', totalRelationships: 'Total Relationships', nodeSize: 30, + docChunk: 'Document & Chunk', + community: 'Communities', + noNodesRels: 'No Nodes and No relationships', }; export const RESULT_STEP_SIZE = 25; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 319322d24..28b176e49 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -166,18 +166,28 @@ export const processGraphData = (neoNodes: ExtendedNode[], neoRels: ExtendedRela return { finalNodes, finalRels, schemeVal }; }; +/** +* Filters nodes, relationships, and scheme based on the selected graph types. +* +* @param graphType - An array of graph types to filter by (e.g., 'DocumentChunk', 'Entities', 'Communities'). +* @param allNodes - An array of all nodes present in the graph. +* @param allRelationships - An array of all relationships in the graph. +* @param scheme - The scheme object containing node and relationship information. +* @returns An object containing filtered nodes, relationships, and scheme based on the selected graph types. +*/ export const filterData = ( graphType: GraphType[], allNodes: ExtendedNode[], allRelationships: Relationship[], - scheme: Scheme + scheme: Scheme, + isGdsActive: boolean, ) => { let filteredNodes: ExtendedNode[] = []; let filteredRelations: Relationship[] = []; let filteredScheme: Scheme = {}; - const entityTypes = Object.keys(scheme).filter((type) => type !== 'Document' && type !== 'Chunk'); - if (graphType.includes('DocumentChunk') && !graphType.includes('Entities')) { - // Document + Chunk + const entityTypes = Object.keys(scheme).filter((type) => type !== 'Document' && type !== 'Chunk' && type !== '__Community__'); + // Only Document + Chunk + if (graphType.includes('DocumentChunk') && !graphType.includes('Entities') && (!graphType.includes('Communities') || !isGdsActive)) { filteredNodes = allNodes.filter( (node) => (node.labels.includes('Document') && node.properties.fileName) || node.labels.includes('Chunk') ); @@ -189,8 +199,8 @@ export const filterData = ( nodeIds.has(rel.to) ); filteredScheme = { Document: scheme.Document, Chunk: scheme.Chunk }; - } else if (graphType.includes('Entities') && !graphType.includes('DocumentChunk')) { // Only Entity + } else if (graphType.includes('Entities') && !graphType.includes('DocumentChunk') && (!graphType.includes('Communities') || !isGdsActive)) { const entityNodes = allNodes.filter((node) => !node.labels.includes('Document') && !node.labels.includes('Chunk')); filteredNodes = entityNodes ? entityNodes : []; const nodeIds = new Set(filteredNodes.map((node) => node.id)); @@ -201,15 +211,68 @@ export const filterData = ( nodeIds.has(rel.to) ); filteredScheme = Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])) as Scheme; - } else if (graphType.includes('DocumentChunk') && graphType.includes('Entities')) { + // Only Communities + } else if (graphType.includes('Communities') && !graphType.includes('DocumentChunk') && !graphType.includes('Entities') && isGdsActive) { + filteredNodes = allNodes.filter((node) => node.labels.includes('__Community__')); + const nodeIds = new Set(filteredNodes.map((node) => node.id)); + filteredRelations = allRelationships.filter( + (rel) => + ['IN_COMMUNITY', 'PARENT_COMMUNITY'].includes(rel.caption ?? '') && nodeIds.has(rel.from) && nodeIds.has(rel.to) + ); + filteredScheme = { __Community__: scheme.__Community__ }; // Document + Chunk + Entity + } else if (graphType.includes('DocumentChunk') && graphType.includes('Entities') && (!graphType.includes('Communities') || !isGdsActive)) { + filteredNodes = allNodes.filter( + (node) => + (node.labels.includes('Document') && node.properties.fileName) || node.labels.includes('Chunk') || !node.labels.includes('Document') && !node.labels.includes('Chunk') && !node.labels.includes('__Community__') + ); + const nodeIds = new Set(filteredNodes.map((node) => node.id)); + filteredRelations = allRelationships.filter( + (rel) => + !['IN_COMMUNITY', 'PARENT_COMMUNITY'].includes(rel.caption ?? '') && + nodeIds.has(rel.from) && + nodeIds.has(rel.to) + ); + filteredScheme = { Document: scheme.Document, Chunk: scheme.Chunk, ...Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])) }; + // Entities + Communities + } else if (graphType.includes('Entities') && graphType.includes('Communities') && !graphType.includes('DocumentChunk') && isGdsActive) { + const entityNodes = allNodes.filter((node) => !node.labels.includes('Document') && !node.labels.includes('Chunk')); + const communityNodes = allNodes.filter((node) => node.labels.includes('__Community__')); + filteredNodes = [...entityNodes, ...communityNodes]; + const nodeIds = new Set(filteredNodes.map((node) => node.id)); + filteredRelations = allRelationships.filter( + (rel) => + !['PART_OF', 'FIRST_CHUNK', 'SIMILAR', 'NEXT_CHUNK'].includes(rel.caption ?? '') && + nodeIds.has(rel.from) && + nodeIds.has(rel.to) + ); + filteredScheme = { + ...Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])), + __Community__: scheme.__Community__ + }; + // Document + Chunk + Communities + } else if (graphType.includes('DocumentChunk') && graphType.includes('Communities') && !graphType.includes('Entities') && isGdsActive) { + const documentChunkNodes = allNodes.filter( + (node) => (node.labels.includes('Document') && node.properties.fileName) || node.labels.includes('Chunk') + ); + const communityNodes = allNodes.filter((node) => node.labels.includes('__Community__')); + filteredNodes = [...documentChunkNodes, ...communityNodes]; + const nodeIds = new Set(filteredNodes.map((node) => node.id)); + filteredRelations = allRelationships.filter( + (rel) => + ['PART_OF', 'FIRST_CHUNK', 'SIMILAR', 'NEXT_CHUNK', 'IN_COMMUNITY', 'PARENT_COMMUNITY'].includes(rel.caption ?? '') && + nodeIds.has(rel.from) && + nodeIds.has(rel.to) + ); + filteredScheme = { Document: scheme.Document, Chunk: scheme.Chunk, __Community__: scheme.__Community__ }; + // Document + Chunk + Entity + Communities (All types) + } else if (graphType.includes('DocumentChunk') && graphType.includes('Entities') && graphType.includes('Communities') && isGdsActive) { filteredNodes = allNodes; filteredRelations = allRelationships; filteredScheme = scheme; } return { filteredNodes, filteredRelations, filteredScheme }; }; - export const getDateTime = () => { const date = new Date(); const formattedDateTime = `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`; From 2d4c35a2214d3a8194e557e99a7aeffa9aa032fc Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Thu, 12 Sep 2024 07:09:48 +0000 Subject: [PATCH 070/292] Added time to process file in extract API and it's functions --- backend/score.py | 3 +++ backend/src/main.py | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/backend/score.py b/backend/score.py index 52bd166bb..f20790cc4 100644 --- a/backend/score.py +++ b/backend/score.py @@ -154,6 +154,7 @@ async def extract_knowledge_graph_from_file( Nodes and Relations created in Neo4j databse for the pdf file """ try: + start_time = time.time() graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) @@ -193,6 +194,8 @@ async def extract_knowledge_graph_from_file( result['source_type'] = source_type result['logging_time'] = formatted_time(datetime.now(timezone.utc)) logger.log_struct(result) + extract_api_time = time.time() - start_time + logging.info(f"extraction completed in {extract_api_time:.2f} seconds for file name {file_name}") return create_api_response('Success', data=result, file_source= source_type) except Exception as e: message=f"Failed To Process File:{file_name} or LLM Unable To Parse Content " diff --git a/backend/src/main.py b/backend/src/main.py index 85e0649f7..cc5569c0d 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -281,6 +281,7 @@ def processing_source(uri, userName, password, database, model, file_name, pages status and model as attributes. """ start_time = datetime.now() + processing_source_start_time = time.time() graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) @@ -329,7 +330,10 @@ def processing_source(uri, userName, password, database, model, file_name, pages logging.info('Exit from running loop of processing file') break else: + processing_chunks_start_time = time.time() node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count) + processing_chunks_end_time = time.time() - processing_chunks_start_time + logging.info(f"{update_graph_chunk_processed} chunks processed upto {select_chunks_upto} completed in {processing_chunks_end_time:.2f} seconds for file name {file_name}") end_time = datetime.now() processed_time = end_time - start_time @@ -369,7 +373,8 @@ def processing_source(uri, userName, password, database, model, file_name, pages delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name) else: delete_uploaded_local_file(merged_file_path, file_name) - + processing_source_func = time.time() - processing_source_start_time + logging.info(f"processing source function completed in {processing_source_func:.2f} seconds for file name {file_name}") return { "fileName": file_name, "nodeCount": node_count, From 29205dc6f88c2df8818aa4ac61e4a514ea1d021d Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Thu, 12 Sep 2024 07:45:45 +0000 Subject: [PATCH 071/292] Add Severity level in cloud logging --- backend/score.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/backend/score.py b/backend/score.py index f20790cc4..ca3707ce1 100644 --- a/backend/score.py +++ b/backend/score.py @@ -193,7 +193,7 @@ async def extract_knowledge_graph_from_file( result['wiki_query'] = wiki_query result['source_type'] = source_type result['logging_time'] = formatted_time(datetime.now(timezone.utc)) - logger.log_struct(result) + logger.log_struct({"severity":"INFO","jsonPayload":result}) extract_api_time = time.time() - start_time logging.info(f"extraction completed in {extract_api_time:.2f} seconds for file name {file_name}") return create_api_response('Success', data=result, file_source= source_type) @@ -212,7 +212,7 @@ async def extract_knowledge_graph_from_file( logging.info(f'Deleted File Path: {merged_file_path} and Deleted File Name : {file_name}') delete_uploaded_local_file(merged_file_path,file_name) json_obj = {'message':message,'error_message':error_message, 'file_name': file_name,'status':'Failed','db_url':uri,'failed_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(json_obj) + logger.log_struct({"severity":"ERROR","jsonPayload":json_obj}) logging.exception(f'File Failed in extraction: {json_obj}') return create_api_response('Failed', message=message + error_message[:100], error=error_message, file_name = file_name) finally: @@ -229,7 +229,7 @@ async def get_source_list(uri:str, userName:str, password:str, database:str=None uri = uri.replace(" ","+") result = await asyncio.to_thread(get_source_list_from_graph,uri,userName,decoded_password,database) json_obj = {'api_name':'sources_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(json_obj) + logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) return create_api_response("Success",data=result) except Exception as e: job_status = "Failed" @@ -243,24 +243,23 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database try: graph = create_graph_database_connection(uri, userName, password, database) tasks = set(map(str.strip, json.loads(tasks))) - + if "materialize_text_chunk_similarities" in tasks: await asyncio.to_thread(update_graph, graph) json_obj = {'api_name': 'post_processing/update_similarity_graph', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(json_obj) logging.info(f'Updated KNN Graph') if "enable_hybrid_search_and_fulltext_search_in_bloom" in tasks: await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="entities") # await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="keyword") - josn_obj = {'api_name': 'post_processing/enable_hybrid_search_and_fulltext_search_in_bloom', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) + json_obj = {'api_name': 'post_processing/enable_hybrid_search_and_fulltext_search_in_bloom', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logging.info(f'Full Text index created') if os.environ.get('ENTITY_EMBEDDING','False').upper()=="TRUE" and "materialize_entity_similarities" in tasks: await asyncio.to_thread(create_entity_embedding, graph) json_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(json_obj) logging.info(f'Entity Embeddings created') + + logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) return create_api_response('Success', message='All tasks completed successfully') except Exception as e: @@ -289,7 +288,8 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), result["info"]["response_time"] = round(total_call_time, 2) json_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(json_obj) + logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) + return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -306,7 +306,7 @@ async def chunk_entities(uri=Form(),userName=Form(), password=Form(), chunk_ids= logging.info(f"URI: {uri}, Username: {userName}, chunk_ids: {chunk_ids}") result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, chunk_ids=chunk_ids) json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(json_obj) + logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -334,7 +334,7 @@ async def graph_query( document_names=document_names ) json_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(json_obj) + logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) return create_api_response('Success', data=result) except Exception as e: job_status = "Failed" @@ -367,7 +367,7 @@ async def connect(uri=Form(), userName=Form(), password=Form(), database=Form()) graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph) json_obj = {'api_name':'connect','db_url':uri,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(json_obj) + logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -384,7 +384,7 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(upload_file, graph, model, file, chunkNumber, totalChunks, originalname, uri, CHUNK_DIR, MERGED_DIR) json_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(json_obj) + logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) if int(chunkNumber) == int(totalChunks): return create_api_response('Success',data=result, message='Source Node Created Successfully') else: @@ -406,7 +406,7 @@ async def get_structured_schema(uri=Form(), userName=Form(), password=Form(), da result = await asyncio.to_thread(get_labels_and_relationtypes, graph) logging.info(f'Schema result from DB: {result}') json_obj = {'api_name':'schema','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(json_obj) + logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) return create_api_response('Success', data=result) except Exception as e: message="Unable to get the labels and relationtypes from neo4j database" @@ -475,7 +475,7 @@ async def delete_document_and_entities(uri=Form(), # entities_count = result[0]['deletedEntities'] if 'deletedEntities' in result[0] else 0 message = f"Deleted {files_list_size} documents with entities from database" json_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(json_obj) + logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) return create_api_response('Success',message=message) except Exception as e: job_status = "Failed" From fd12e9ad4d4f222dc0ba0cdc576d5ac02a346629 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 12 Sep 2024 10:00:55 +0000 Subject: [PATCH 072/292] changed name --- backend/src/QA_integration.py | 4 ++-- backend/src/shared/constants.py | 2 +- frontend/src/utils/Constants.ts | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index ecea7b50b..e6a25c225 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -190,7 +190,7 @@ def process_documents(docs, question, messages, llm, model,chat_mode_settings): "context": formatted_docs, "input": question }) - if chat_mode_settings["mode"] == "local_community_search": + if chat_mode_settings["mode"] == "entity search+vector": result = {'sources': list(), 'chunkdetails': list()} result.update(lc_entities) @@ -325,7 +325,7 @@ def get_neo4j_retriever(graph, document_names,chat_mode_settings, search_k=CHAT_ neo_db = initialize_neo4j_vector(graph, chat_mode_settings) document_names= list(map(str.strip, json.loads(document_names))) - search_k = LOCAL_COMMUNITY_TOP_K if chat_mode_settings["mode"] == "local_community_search" else CHAT_SEARCH_KWARG_K + search_k = LOCAL_COMMUNITY_TOP_K if chat_mode_settings["mode"] == "entity search+vector" else CHAT_SEARCH_KWARG_K retriever = create_retriever(neo_db, document_names,chat_mode_settings, search_k, score_threshold) return retriever except Exception as e: diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index a8661f8b7..acfaeea3d 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -312,7 +312,7 @@ "keyword_index": "keyword", "document_filter": False }, - "local_community_search": { + "entity search+vector": { "retrieval_query": LOCAL_COMMUNITY_SEARCH_QUERY.format(topChunks=LOCAL_COMMUNITY_TOP_CHUNKS, topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS)+LOCAL_COMMUNITY_SEARCH_QUERY_SUFFIX, diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index b323f9d4c..a3f77c134 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -59,7 +59,7 @@ export const defaultLLM = llms?.includes('openai-gpt-4o') export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',') - : ['vector', 'graph', 'graph+vector', 'fulltext', 'graph+vector+fulltext', 'local community', 'global community']; + : ['vector', 'graph', 'graph+vector', 'fulltext', 'graph+vector+fulltext', 'entity search+vector', 'global community']; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; export const timePerByte = 0.2; From 21dd2837df121177f132f1f21d88d9c13c0f5a10 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 12 Sep 2024 10:12:01 +0000 Subject: [PATCH 073/292] modified the query --- backend/src/shared/constants.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index acfaeea3d..534061a85 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -273,14 +273,14 @@ LOCAL_COMMUNITY_SEARCH_QUERY_SUFFIX = """ -RETURN {{chunks: [c in chunks | c.text], +RETURN {chunks: [c in chunks | c.text], communities: [c in communities | c.summary], entities: [n in nodes | apoc.coll.removeAll(labels(n),["__Entity__"])[0] + ":"+ n.id + " " + coalesce(n.description, "")], relationships: [r in rels | startNode(r).id+" "+type(r)+" "+endNode(r).id], - outside: {{ + outside: { nodes: [n in outside[0].nodes | apoc.coll.removeAll(labels(n),["__Entity__"])[0] + ":"+n.id + " " + coalesce(n.description, "")], - relationships: [r in outside[0].rels | apoc.coll.removeAll(labels(startNode(r)),["__Entity__"])[0] + ":"+startNode(r).id+" "+type(r)+" "+apoc.coll.removeAll(labels(startNode(r)),["__Entity__"])[0] + ":"+endNode(r).id]}} - }} AS text, score, {{entities:metadata}} as metadata + relationships: [r in outside[0].rels | apoc.coll.removeAll(labels(startNode(r)),["__Entity__"])[0] + ":"+startNode(r).id+" "+type(r)+" "+apoc.coll.removeAll(labels(startNode(r)),["__Entity__"])[0] + ":"+endNode(r).id]} + } AS text, score, {entities:metadata} as metadata """ LOCAL_COMMUNITY_DETAILS_QUERY_PREFIX = """ From ccfabc7fbc5179e09d731907172079b7abf17a8f Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:19:18 +0000 Subject: [PATCH 074/292] updated entities param --- backend/src/QA_integration.py | 6 +++--- backend/src/shared/constants.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index e6a25c225..cd8fb5e5c 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -155,7 +155,7 @@ def format_documents(documents, model): formatted_docs = [] sources = set() - lc_entities = {} + lc_entities = {'entities':list()} for doc in sorted_documents: try: @@ -357,12 +357,12 @@ def process_chat_response(messages,history, question, model, graph, document_nam try: llm, doc_retriever, model_version = setup_chat(model, graph, document_names,chat_mode_settings) - docs = retrieve_documents(doc_retriever, messages) + docs = retrieve_documents(doc_retriever, messages) if docs: content, result, total_tokens = process_documents(docs, question, messages, llm, model,chat_mode_settings) else: content = "I couldn't find any relevant documents to answer your question." - result = {"sources": [], "chunkdetails": []} + result = {"sources": [], "chunkdetails": [],"entities":[]} total_tokens = 0 ai_response = AIMessage(content=content) diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 534061a85..3ae5f7693 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -271,7 +271,6 @@ }} as outside """ - LOCAL_COMMUNITY_SEARCH_QUERY_SUFFIX = """ RETURN {chunks: [c in chunks | c.text], communities: [c in communities | c.summary], From dde29f2d79d27fda6dd530485d07be27d057e085 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Fri, 13 Sep 2024 05:23:23 +0000 Subject: [PATCH 075/292] added document to details --- backend/src/chunkid_entities.py | 4 ++-- backend/src/shared/constants.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index e6ca8a512..82dd5ef44 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -88,12 +88,12 @@ def process_chunkids(driver, chunk_ids): logging.error(f"chunkid_entities module: Error processing chunk ids: {chunk_ids}. Error: {e}") raise -def remove_duplicate_nodes(nodes): +def remove_duplicate_nodes(nodes,property="element_id"): unique_nodes = [] seen_element_ids = set() for node in nodes: - element_id = node['element_id'] + element_id = node[property] if element_id not in seen_element_ids: unique_nodes.append(node) seen_element_ids.add(element_id) diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 3ae5f7693..993d8d234 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -289,7 +289,10 @@ """ LOCAL_COMMUNITY_DETAILS_QUERY_SUFFIX = """ -RETURN [chunk in chunks | chunk {.*, embedding:null}] as chunks, +WITH * +UNWIND chunks as c +MATCH (c)-[:PART_OF]->(d:Document) +RETURN [c {.*, embedding:null, fileName:d.fileName, fileType:d.fileType}] as chunks, [community in communities | community {.*, embedding:null}] as communities, [node in nodes+outside[0].nodes | {element_id:elementId(node), labels:labels(node), properties:{id:node.id,description:node.description}}] as nodes, [r in rels+outside[0].rels | {startNode:{element_id:elementId(startNode(r)), labels:labels(startNode(r)), properties:{id:startNode(r).id,description:startNode(r).description}}, @@ -297,7 +300,6 @@ relationship: {type:type(r), element_id:elementId(r)}}] as entities """ - CHAT_MODE_CONFIG_MAP= { "vector": { "retrieval_query": VECTOR_SEARCH_QUERY, From 82be95b34841015ac7dde508e23f3cf465602b79 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 13 Sep 2024 05:39:18 +0000 Subject: [PATCH 076/292] format fixes --- frontend/src/components/Content.tsx | 4 ++-- frontend/src/components/FileTable.tsx | 4 ++-- frontend/src/components/QuickStarter.tsx | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 052e095b3..8bbe10f92 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -513,7 +513,7 @@ const Content: React.FC = ({ if (response.data.status === 'Failure') { throw new Error(response.data.error); } - const isStartFromBegining = retryoption === RETRY_OPIONS[0] || retryoption===RETRY_OPIONS[1]; + const isStartFromBegining = retryoption === RETRY_OPIONS[0] || retryoption === RETRY_OPIONS[1]; setFilesData((prev) => { return prev.map((f) => { return f.name === filename @@ -891,4 +891,4 @@ const Content: React.FC = ({ ); }; -export default Content; \ No newline at end of file +export default Content; diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 04e2a8841..50e03a5cd 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -35,7 +35,7 @@ import { } from '../utils/Utils'; import { SourceNode, CustomFile, FileTableProps, UserCredentials, statusupdate, ChildRef } from '../types'; import { useCredentials } from '../context/UserCredentials'; -import { ArrowPathIconSolid, MagnifyingGlassCircleIconSolid } from '@neo4j-ndl/react/icons'; +import { MagnifyingGlassCircleIconSolid } from '@neo4j-ndl/react/icons'; import CustomProgressBar from './UI/CustomProgressBar'; import subscribe from '../services/PollingAPI'; import { triggerStatusUpdateAPI } from '../services/ServerSideStatusUpdateAPI'; @@ -49,7 +49,7 @@ import IndeterminateCheckbox from './UI/CustomCheckBox'; import { showErrorToast, showNormalToast } from '../utils/toasts'; let onlyfortheFirstRender = true; const FileTable = forwardRef((props, ref) => { - const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry } = props; + const { isExpanded, connectionStatus, setConnectionStatus, onInspect } = props; const { filesData, setFilesData, model, rowSelection, setRowSelection, setSelectedRows, setProcessedCount, queue } = useFileContext(); const { userCredentials } = useCredentials(); diff --git a/frontend/src/components/QuickStarter.tsx b/frontend/src/components/QuickStarter.tsx index 29832fee1..db2cba7e7 100644 --- a/frontend/src/components/QuickStarter.tsx +++ b/frontend/src/components/QuickStarter.tsx @@ -42,4 +42,4 @@ const QuickStarter: React.FunctionComponent = () => { ); }; -export default QuickStarter; \ No newline at end of file +export default QuickStarter; From 93a4698e253661bb95d1a0ad472eba077f5d3e84 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Fri, 13 Sep 2024 07:06:33 +0000 Subject: [PATCH 077/292] added global embedding --- backend/score.py | 2 +- backend/src/make_relationships.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/backend/score.py b/backend/score.py index 54cbb3ef3..99e8bd812 100644 --- a/backend/score.py +++ b/backend/score.py @@ -446,7 +446,7 @@ async def generate(): graph = create_graph_database_connection(uri, userName, decoded_password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.get_current_status_document_node(file_name) - print(f'Result of document status in SSE : {result}') + # print(f'Result of document status in SSE : {result}') if len(result) > 0: status = json.dumps({'fileName':file_name, 'status':result[0]['Status'], diff --git a/backend/src/make_relationships.py b/backend/src/make_relationships.py index 4783c5abe..1ea2729e5 100644 --- a/backend/src/make_relationships.py +++ b/backend/src/make_relationships.py @@ -9,6 +9,9 @@ logging.basicConfig(format='%(asctime)s - %(message)s',level='INFO') +EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') +EMBEDDING_FUNCTION , EMBEDDING_DIMENSION = load_embedding_model(EMBEDDING_MODEL) + def merge_relationship_between_chunk_and_entites(graph: Neo4jGraph, graph_documents_chunk_chunk_Id : list): batch_data = [] logging.info("Create HAS_ENTITY relationship between chunks and entities") @@ -39,9 +42,9 @@ def merge_relationship_between_chunk_and_entites(graph: Neo4jGraph, graph_docume def update_embedding_create_vector_index(graph, chunkId_chunkDoc_list, file_name): #create embedding isEmbedding = os.getenv('IS_EMBEDDING') - embedding_model = os.getenv('EMBEDDING_MODEL') + # embedding_model = os.getenv('EMBEDDING_MODEL') - embeddings, dimension = load_embedding_model(embedding_model) + embeddings, dimension = EMBEDDING_FUNCTION , EMBEDDING_DIMENSION logging.info(f'embedding model:{embeddings} and dimesion:{dimension}') data_for_query = [] logging.info(f"update embedding and vector index for chunks") From 7e0f1c80c1d73afae8d02a1039b309d4a53dd7e6 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Fri, 13 Sep 2024 09:09:13 +0000 Subject: [PATCH 078/292] added database --- backend/score.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/score.py b/backend/score.py index 99e8bd812..a9c3f1f4e 100644 --- a/backend/score.py +++ b/backend/score.py @@ -325,6 +325,7 @@ async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=F @app.post("/graph_query") async def graph_query( uri: str = Form(), + database: str = Form(), userName: str = Form(), password: str = Form(), document_names: str = Form(None), @@ -336,6 +337,7 @@ async def graph_query( uri=uri, username=userName, password=password, + database=database, document_names=document_names ) json_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc))} From d1eda2be34445d3dcaa4a0cdee0fa217abd7157b Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Fri, 13 Sep 2024 09:15:47 +0000 Subject: [PATCH 079/292] added database --- backend/src/graph_query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py index bf51cb76c..77f6e9d0a 100644 --- a/backend/src/graph_query.py +++ b/backend/src/graph_query.py @@ -181,7 +181,7 @@ def get_completed_documents(driver): return documents -def get_graph_results(uri, username, password,document_names): +def get_graph_results(uri, username, password,database,document_names): """ Retrieves graph data by executing a specified Cypher query using credentials and parameters provided. Processes the results to extract nodes and relationships and packages them in a structured output. @@ -198,7 +198,7 @@ def get_graph_results(uri, username, password,document_names): """ try: logging.info(f"Starting graph query process") - driver = get_graphDB_driver(uri, username, password) + driver = get_graphDB_driver(uri, username, password,database) document_names= list(map(str.strip, json.loads(document_names))) query = GRAPH_QUERY.format(graph_chunk_limit=GRAPH_CHUNK_LIMIT) records, summary , keys = execute_query(driver, query.strip(), document_names) From 3c0a5df61871df5193195ab7ecab703730326e6c Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Fri, 13 Sep 2024 10:16:01 +0000 Subject: [PATCH 080/292] modified is_entity --- backend/score.py | 2 +- backend/src/chunkid_entities.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/score.py b/backend/score.py index f4653ef63..7963146c3 100644 --- a/backend/score.py +++ b/backend/score.py @@ -309,7 +309,7 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), chunk_ids=Form(None),is_entity=Form()): try: logging.info(f"URI: {uri}, Username: {userName}, chunk_ids: {chunk_ids}") - result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,chunk_ids=chunk_ids,is_entity=is_entity) + result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,chunk_ids=chunk_ids,is_entity=json.loads(is_entity.lower())) json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) return create_api_response('Success',data=result) diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index 82dd5ef44..eee2c0d45 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -146,6 +146,7 @@ def get_entities_from_chunkids(uri, username, password, database ,chunk_ids,is_e driver = get_graphDB_driver(uri, username, password,database) if not is_entity: if chunk_ids: + logging.info(f"chunkid_entities module: Starting for chunk ids : {chunk_ids}") result = process_chunkids(driver,chunk_ids) else: logging.info(f"chunkid_entities module: No chunk ids are passed") @@ -157,6 +158,7 @@ def get_entities_from_chunkids(uri, username, password, database ,chunk_ids,is_e return result if chunk_ids: result = process_entityids(driver,chunk_ids) + logging.info(f"chunkid_entities module: Starting for entity ids : {chunk_ids}") else: logging.info(f"chunkid_entities module: No entity ids are passed") result = { From 9e349192edddc0a9184f25906c9ee4461283f9d4 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Mon, 16 Sep 2024 05:48:51 +0000 Subject: [PATCH 081/292] modifies chunk entities --- backend/src/chunkid_entities.py | 11 ++++++----- backend/src/shared/constants.py | 5 ++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index eee2c0d45..89ea069f6 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -117,11 +117,12 @@ def process_entityids(driver, chunk_ids): records, summary, keys = driver.execute_query(query, entityIds=entity_ids_list) result = process_records(records) - result["nodes"].extend(records[0]["nodes"]) - result["nodes"] = remove_duplicate_nodes(result["nodes"]) - logging.info(f"Nodes and relationships are processed") - result["chunk_data"] = records[0]["chunks"] - result["community_data"] = records[0]["communities"] + if records: + result["nodes"].extend(records[0]["nodes"]) + result["nodes"] = remove_duplicate_nodes(result["nodes"]) + logging.info(f"Nodes and relationships are processed") + result["chunk_data"] = records[0]["chunks"] + result["community_data"] = records[0]["communities"] logging.info(f"Query process completed successfully for chunk ids: {chunk_ids}") return result except Exception as e: diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 993d8d234..4ec88e8a5 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -248,7 +248,7 @@ collect {{ UNWIND nodes as n MATCH (n)-[:IN_COMMUNITY]->(c:__Community__) - WITH c, c.rank as rank, c.weight AS weight + WITH c, c.community_rank as rank, c.weight AS weight RETURN c ORDER BY rank, weight DESC LIMIT {topCommunities} @@ -287,12 +287,11 @@ MATCH (node) WHERE elementId(node) = id WITH node, 1.0 as score """ - LOCAL_COMMUNITY_DETAILS_QUERY_SUFFIX = """ WITH * UNWIND chunks as c MATCH (c)-[:PART_OF]->(d:Document) -RETURN [c {.*, embedding:null, fileName:d.fileName, fileType:d.fileType}] as chunks, +RETURN [c {.*, embedding:null, fileName:d.fileName, fileSource:d.fileSource}] as chunks, [community in communities | community {.*, embedding:null}] as communities, [node in nodes+outside[0].nodes | {element_id:elementId(node), labels:labels(node), properties:{id:node.id,description:node.description}}] as nodes, [r in rels+outside[0].rels | {startNode:{element_id:elementId(startNode(r)), labels:labels(startNode(r)), properties:{id:startNode(r).id,description:startNode(r).description}}, From fa1ca363979bfb8762a5a5150f76680aaa1ef8b2 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Mon, 16 Sep 2024 06:30:20 +0000 Subject: [PATCH 082/292] Added secweb to fix security issues --- backend/requirements.txt | 1 + backend/score.py | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/backend/requirements.txt b/backend/requirements.txt index 46c57aea5..7c67f13fa 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -179,3 +179,4 @@ sentence-transformers==3.0.1 google-cloud-logging==3.10.0 PyMuPDF==1.24.5 pypandoc==1.13 +Secweb==1.11.0 \ No newline at end of file diff --git a/backend/score.py b/backend/score.py index ca3707ce1..8f62b29f1 100644 --- a/backend/score.py +++ b/backend/score.py @@ -26,6 +26,11 @@ from datetime import datetime, timezone import time import gc +from Secweb import SecWeb +from Secweb.StrictTransportSecurity import HSTS +from Secweb.ContentSecurityPolicy import ContentSecurityPolicy +from Secweb.XContentTypeOptions import XContentTypeOptions +from Secweb.XFrameOptions import XFrame logger = CustomLogger() CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") @@ -42,6 +47,11 @@ def sick(): return False app = FastAPI() +SecWeb(app=app, Option={'referrer': False, 'xframe': False}) +app.add_middleware(HSTS, Option={'max-age': 4, 'preload': True}) +app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) +app.add_middleware(XContentTypeOptions) +app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) app.add_middleware( CORSMiddleware, From 62f454dedba7d56f3acb36080b1d90406449a65a Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Mon, 16 Sep 2024 09:03:20 +0000 Subject: [PATCH 083/292] removed QA integration --- backend/src/QA_integration_new.py | 424 ------------------------------ 1 file changed, 424 deletions(-) delete mode 100644 backend/src/QA_integration_new.py diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration_new.py deleted file mode 100644 index eeac78c1e..000000000 --- a/backend/src/QA_integration_new.py +++ /dev/null @@ -1,424 +0,0 @@ -from langchain_community.vectorstores.neo4j_vector import Neo4jVector -from langchain.graphs import Neo4jGraph -import os -from dotenv import load_dotenv -import logging -from langchain_community.chat_message_histories import Neo4jChatMessageHistory -from src.llm import get_llm -from src.shared.common_fn import load_embedding_model -import re -from typing import Any -from datetime import datetime -import time -from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder -from langchain_core.output_parsers import StrOutputParser -from langchain_core.runnables import RunnableBranch -from langchain.retrievers import ContextualCompressionRetriever -from langchain_community.document_transformers import EmbeddingsRedundantFilter -from langchain.retrievers.document_compressors import EmbeddingsFilter -from langchain.retrievers.document_compressors import DocumentCompressorPipeline -from langchain_text_splitters import TokenTextSplitter -from langchain_core.messages import HumanMessage,AIMessage -from src.shared.constants import * -from src.llm import get_llm -from langchain.chains import GraphCypherQAChain -import json - -## Chat models -from langchain_openai import ChatOpenAI, AzureChatOpenAI -from langchain_google_vertexai import ChatVertexAI -from langchain_groq import ChatGroq -from langchain_anthropic import ChatAnthropic -from langchain_fireworks import ChatFireworks -from langchain_aws import ChatBedrock -from langchain_community.chat_models import ChatOllama - -load_dotenv() - -EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') -EMBEDDING_FUNCTION , _ = load_embedding_model(EMBEDDING_MODEL) - - -def get_neo4j_retriever(graph, retrieval_query,document_names,mode,index_name="vector",keyword_index="keyword", search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): - try: - if mode == "fulltext" or mode == "graph + vector + fulltext": - neo_db = Neo4jVector.from_existing_graph( - embedding=EMBEDDING_FUNCTION, - index_name=index_name, - retrieval_query=retrieval_query, - graph=graph, - search_type="hybrid", - node_label="Chunk", - embedding_node_property="embedding", - text_node_properties=["text"], - keyword_index_name=keyword_index - ) - # neo_db = Neo4jVector.from_existing_index( - # embedding=EMBEDDING_FUNCTION, - # index_name=index_name, - # retrieval_query=retrieval_query, - # graph=graph, - # search_type="hybrid", - # keyword_index_name=keyword_index - # ) - logging.info(f"Successfully retrieved Neo4jVector index '{index_name}' and keyword index '{keyword_index}'") - else: - neo_db = Neo4jVector.from_existing_index( - embedding=EMBEDDING_FUNCTION, - index_name=index_name, - retrieval_query=retrieval_query, - graph=graph - ) - logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'") - document_names= list(map(str.strip, json.loads(document_names))) - if document_names: - retriever = neo_db.as_retriever(search_type="similarity_score_threshold",search_kwargs={'k': search_k, "score_threshold": score_threshold,'filter':{'fileName': {'$in': document_names}}}) - logging.info(f"Successfully created retriever for index '{index_name}' with search_k={search_k}, score_threshold={score_threshold} for documents {document_names}") - else: - retriever = neo_db.as_retriever(search_type="similarity_score_threshold",search_kwargs={'k': search_k, "score_threshold": score_threshold}) - logging.info(f"Successfully created retriever for index '{index_name}' with search_k={search_k}, score_threshold={score_threshold}") - return retriever - except Exception as e: - logging.error(f"Error retrieving Neo4jVector index '{index_name}' or creating retriever: {e}") - raise Exception("An error occurred while retrieving the Neo4jVector index '{index_name}' or creating the retriever. Please drop and create a new vector index: {e}") from e - -def create_document_retriever_chain(llm, retriever): - try: - logging.info("Starting to create document retriever chain") - - query_transform_prompt = ChatPromptTemplate.from_messages( - [ - ("system", QUESTION_TRANSFORM_TEMPLATE), - MessagesPlaceholder(variable_name="messages") - ] - ) - - output_parser = StrOutputParser() - - splitter = TokenTextSplitter(chunk_size=CHAT_DOC_SPLIT_SIZE, chunk_overlap=0) - embeddings_filter = EmbeddingsFilter( - embeddings=EMBEDDING_FUNCTION, - similarity_threshold=CHAT_EMBEDDING_FILTER_SCORE_THRESHOLD - ) - - pipeline_compressor = DocumentCompressorPipeline( - transformers=[splitter, embeddings_filter] - ) - - compression_retriever = ContextualCompressionRetriever( - base_compressor=pipeline_compressor, base_retriever=retriever - ) - - query_transforming_retriever_chain = RunnableBranch( - ( - lambda x: len(x.get("messages", [])) == 1, - (lambda x: x["messages"][-1].content) | compression_retriever, - ), - query_transform_prompt | llm | output_parser | compression_retriever, - ).with_config(run_name="chat_retriever_chain") - - logging.info("Successfully created document retriever chain") - return query_transforming_retriever_chain - - except Exception as e: - logging.error(f"Error creating document retriever chain: {e}", exc_info=True) - raise - - -def create_neo4j_chat_message_history(graph, session_id): - """ - Creates and returns a Neo4jChatMessageHistory instance. - - """ - try: - - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id - ) - return history - - except Exception as e: - logging.error(f"Error creating Neo4jChatMessageHistory: {e}") - raise - -def format_documents(documents,model): - prompt_token_cutoff = 4 - for models,value in CHAT_TOKEN_CUT_OFF.items(): - if model in models: - prompt_token_cutoff = value - - sorted_documents = sorted(documents, key=lambda doc: doc.state["query_similarity_score"], reverse=True) - sorted_documents = sorted_documents[:prompt_token_cutoff] - - formatted_docs = [] - sources = set() - - for doc in sorted_documents: - source = doc.metadata['source'] - sources.add(source) - - formatted_doc = ( - "Document start\n" - f"This Document belongs to the source {source}\n" - f"Content: {doc.page_content}\n" - "Document end\n" - ) - formatted_docs.append(formatted_doc) - - return "\n\n".join(formatted_docs), sources - -def get_rag_chain(llm,system_template=CHAT_SYSTEM_TEMPLATE): - question_answering_prompt = ChatPromptTemplate.from_messages( - [ - ("system", system_template), - MessagesPlaceholder(variable_name="messages"), - ( - "human", - "User question: {input}" - ), - ] - ) - question_answering_chain = question_answering_prompt | llm - - return question_answering_chain - -def get_sources_and_chunks(sources_used, docs): - chunkdetails_list = [] - sources_used_set = set(sources_used) - - for doc in docs: - source = doc.metadata["source"] - chunkdetails = doc.metadata["chunkdetails"] - if source in sources_used_set: - chunkdetails = [{**chunkdetail, "score": round(chunkdetail["score"], 4)} for chunkdetail in chunkdetails] - chunkdetails_list.extend(chunkdetails) - - result = { - 'sources': sources_used, - 'chunkdetails': chunkdetails_list - } - return result - -def summarize_messages(llm,history,stored_messages): - if len(stored_messages) == 0: - return False - summarization_prompt = ChatPromptTemplate.from_messages( - [ - MessagesPlaceholder(variable_name="chat_history"), - ( - "human", - "Summarize the above chat messages into a concise message, focusing on key points and relevant details that could be useful for future conversations. Exclude all introductions and extraneous information." - ), - ] - ) - - summarization_chain = summarization_prompt | llm - - summary_message = summarization_chain.invoke({"chat_history": stored_messages}) - - history.clear() - history.add_user_message("Our current convertaion summary till now") - history.add_message(summary_message) - return True - - -def get_total_tokens(ai_response,llm): - - if isinstance(llm,(ChatOpenAI,AzureChatOpenAI,ChatFireworks,ChatGroq)): - total_tokens = ai_response.response_metadata['token_usage']['total_tokens'] - elif isinstance(llm,(ChatVertexAI)): - total_tokens = ai_response.response_metadata['usage_metadata']['prompt_token_count'] - elif isinstance(llm,(ChatBedrock)): - total_tokens = ai_response.response_metadata['usage']['total_tokens'] - elif isinstance(llm,(ChatAnthropic)): - input_tokens = int(ai_response.response_metadata['usage']['input_tokens']) - output_tokens = int(ai_response.response_metadata['usage']['output_tokens']) - total_tokens = input_tokens + output_tokens - elif isinstance(llm,(ChatOllama)): - total_tokens = ai_response.response_metadata["prompt_eval_count"] - else: - total_tokens = 0 - return total_tokens - - -def clear_chat_history(graph,session_id): - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id - ) - history.clear() - return { - "session_id": session_id, - "message": "The chat History is cleared", - "user": "chatbot" - } - -def setup_chat(model, graph, document_names,retrieval_query,mode): - start_time = time.time() - if model in ["diffbot"]: - model = "openai-gpt-4o" - llm,model_name = get_llm(model) - logging.info(f"Model called in chat {model} and model version is {model_name}") - retriever = get_neo4j_retriever(graph=graph,retrieval_query=retrieval_query,document_names=document_names,mode=mode) - doc_retriever = create_document_retriever_chain(llm, retriever) - chat_setup_time = time.time() - start_time - logging.info(f"Chat setup completed in {chat_setup_time:.2f} seconds") - - return llm, doc_retriever, model_name - -def retrieve_documents(doc_retriever, messages): - start_time = time.time() - docs = doc_retriever.invoke({"messages": messages}) - doc_retrieval_time = time.time() - start_time - logging.info(f"Documents retrieved in {doc_retrieval_time:.2f} seconds") - return docs - -def process_documents(docs, question, messages, llm,model): - start_time = time.time() - formatted_docs, sources = format_documents(docs,model) - rag_chain = get_rag_chain(llm=llm) - ai_response = rag_chain.invoke({ - "messages": messages[:-1], - "context": formatted_docs, - "input": question - }) - result = get_sources_and_chunks(sources, docs) - content = ai_response.content - total_tokens = get_total_tokens(ai_response,llm) - - - predict_time = time.time() - start_time - logging.info(f"Final Response predicted in {predict_time:.2f} seconds") - - return content, result, total_tokens - -def summarize_and_log(history, messages, llm): - start_time = time.time() - summarize_messages(llm, history, messages) - history_summarized_time = time.time() - start_time - logging.info(f"Chat History summarized in {history_summarized_time:.2f} seconds") - - -def create_graph_chain(model, graph): - try: - logging.info(f"Graph QA Chain using LLM model: {model}") - - cypher_llm,model_name = get_llm(model) - qa_llm,model_name = get_llm(model) - graph_chain = GraphCypherQAChain.from_llm( - cypher_llm=cypher_llm, - qa_llm=qa_llm, - validate_cypher= True, - graph=graph, - # verbose=True, - return_intermediate_steps = True, - top_k=3 - ) - - logging.info("GraphCypherQAChain instance created successfully.") - return graph_chain,qa_llm,model_name - - except Exception as e: - logging.error(f"An error occurred while creating the GraphCypherQAChain instance. : {e}") - - -def get_graph_response(graph_chain, question): - try: - cypher_res = graph_chain.invoke({"query": question}) - - response = cypher_res.get("result") - cypher_query = "" - context = [] - - for step in cypher_res.get("intermediate_steps", []): - if "query" in step: - cypher_string = step["query"] - cypher_query = cypher_string.replace("cypher\n", "").replace("\n", " ").strip() - elif "context" in step: - context = step["context"] - return { - "response": response, - "cypher_query": cypher_query, - "context": context - } - - except Exception as e: - logging.error("An error occurred while getting the graph response : {e}") - -def QA_RAG(graph, model, question, document_names,session_id, mode): - try: - logging.info(f"Chat Mode : {mode}") - history = create_neo4j_chat_message_history(graph, session_id) - messages = history.messages - user_question = HumanMessage(content=question) - messages.append(user_question) - - if mode == "graph": - graph_chain, qa_llm,model_version = create_graph_chain(model,graph) - graph_response = get_graph_response(graph_chain,question) - ai_response = AIMessage(content=graph_response["response"]) if graph_response["response"] else AIMessage(content="Something went wrong") - messages.append(ai_response) - summarize_and_log(history, messages, qa_llm) - - result = { - "session_id": session_id, - "message": graph_response["response"], - "info": { - "model": model_version, - 'cypher_query':graph_response["cypher_query"], - "context" : graph_response["context"], - "mode" : mode, - "response_time": 0 - }, - "user": "chatbot" - } - return result - elif mode == "vector" or mode == "fulltext": - retrieval_query = VECTOR_SEARCH_QUERY - else: - retrieval_query = VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT) - - llm, doc_retriever, model_version = setup_chat(model, graph, document_names,retrieval_query,mode) - - docs = retrieve_documents(doc_retriever, messages) - - if docs: - content, result, total_tokens = process_documents(docs, question, messages, llm,model) - else: - content = "I couldn't find any relevant documents to answer your question." - result = {"sources": [], "chunkdetails": []} - total_tokens = 0 - - ai_response = AIMessage(content=content) - messages.append(ai_response) - summarize_and_log(history, messages, llm) - - return { - "session_id": session_id, - "message": content, - "info": { - "sources": result["sources"], - "model": model_version, - "chunkdetails": result["chunkdetails"], - "total_tokens": total_tokens, - "response_time": 0, - "mode": mode - }, - "user": "chatbot" - } - - except Exception as e: - logging.exception(f"Exception in QA component at {datetime.now()}: {str(e)}") - error_name = type(e).__name__ - return { - "session_id": session_id, - "message": "Something went wrong", - "info": { - "sources": [], - "chunkids": [], - "error": f"{error_name} :- {str(e)}", - "mode": mode - }, - "user": "chatbot" - } From 4c7821e17d238113fd124e0a8c83b13a9a209fd5 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Mon, 16 Sep 2024 09:20:06 +0000 Subject: [PATCH 084/292] created neo4j from existing index --- backend/src/QA_integration.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index cd8fb5e5c..d28459a9b 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -288,16 +288,26 @@ def initialize_neo4j_vector(graph, chat_mode_settings): ) logging.info(f"Successfully retrieved Neo4jVector Fulltext index '{index_name}' and keyword index '{keyword_index}'") else: - neo_db = Neo4jVector.from_existing_index( + # neo_db = Neo4jVector.from_existing_index( + # embedding=EMBEDDING_FUNCTION, + # index_name=index_name, + # retrieval_query=retrieval_query, + # graph=graph + # ) + neo_db = Neo4jVector.from_existing_graph( embedding=EMBEDDING_FUNCTION, index_name=index_name, retrieval_query=retrieval_query, - graph=graph + graph=graph, + node_label="Chunk", + embedding_node_property="embedding", + text_node_properties=["text"] ) logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'") except Exception as e: - logging.error(f"Error retrieving Neo4jVector index : {e}") + index_name = chat_mode_settings.get("index_name") + logging.error(f"Error retrieving Neo4jVector index {index_name} : {e}") raise return neo_db @@ -329,8 +339,9 @@ def get_neo4j_retriever(graph, document_names,chat_mode_settings, search_k=CHAT_ retriever = create_retriever(neo_db, document_names,chat_mode_settings, search_k, score_threshold) return retriever except Exception as e: - logging.error(f"Error retrieving Neo4jVector index or creating retriever: {e}") - raise Exception("An error occurred while retrieving the Neo4jVector index or creating the retriever. Please drop and create a new vector index: {e}") from e + index_name = chat_mode_settings.get("index_name") + logging.error(f"Error retrieving Neo4jVector index {index_name} or creating retriever: {e}") + raise Exception(f"An error occurred while retrieving the Neo4jVector index or creating the retriever. Please drop and create a new vector index '{index_name}': {e}") from e def setup_chat(model, graph, document_names, chat_mode_settings): From 4206ec00ad5f111a6a26efa3e1aaea0e5fc216db Mon Sep 17 00:00:00 2001 From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Date: Mon, 16 Sep 2024 10:40:20 +0000 Subject: [PATCH 085/292] modified script --- backend/test_integrationqa.py | 356 +++++++++++++++++----------------- 1 file changed, 178 insertions(+), 178 deletions(-) diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index b5a50de60..7b2b110d4 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -5,7 +5,6 @@ import pandas as pd from datetime import datetime as dt from dotenv import load_dotenv - from score import * from src.main import * from src.QA_integration_new import QA_RAG @@ -26,163 +25,163 @@ graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) def create_source_node_local(graph, model, file_name): - """Creates a source node for a local file.""" - source_node = sourceNode() - source_node.file_name = file_name - source_node.file_type = 'pdf' - source_node.file_size = '1087' - source_node.file_source = 'local file' - source_node.model = model - source_node.created_at = dt.now() - graphDB_data_Access = graphDBdataAccess(graph) - graphDB_data_Access.create_source_node(source_node) - return source_node + """Creates a source node for a local file.""" + source_node = sourceNode() + source_node.file_name = file_name + source_node.file_type = 'pdf' + source_node.file_size = '1087' + source_node.file_source = 'local file' + source_node.model = model + source_node.created_at = dt.now() + graphDB_data_Access = graphDBdataAccess(graph) + graphDB_data_Access.create_source_node(source_node) + return source_node def test_graph_from_file_local(model_name): - """Test graph creation from a local file.""" - file_name = 'About Amazon.pdf' - shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf', - os.path.join(MERGED_DIR, file_name)) - - create_source_node_local(graph, model_name, file_name) - merged_file_path = os.path.join(MERGED_DIR, file_name) - - local_file_result = extract_graph_from_file_local_file( - URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '' - ) - logging.info("Local file processing complete") - print(local_file_result) - - try: - assert local_file_result['status'] == 'Completed' - assert local_file_result['nodeCount'] > 0 - assert local_file_result['relationshipCount'] > 0 - print("Success") - except AssertionError as e: - print("Fail: ", e) - - return local_file_result + """Test graph creation from a local file.""" + file_name = 'About Amazon.pdf' + shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf', + os.path.join(MERGED_DIR, file_name)) + + create_source_node_local(graph, model_name, file_name) + merged_file_path = os.path.join(MERGED_DIR, file_name) + + local_file_result = extract_graph_from_file_local_file( + URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '',None + ) + logging.info("Local file processing complete") + print(local_file_result) + + try: + assert local_file_result['status'] == 'Completed' + assert local_file_result['nodeCount'] > 0 + assert local_file_result['relationshipCount'] > 0 + print("Success") + except AssertionError as e: + print("Fail: ", e) + + return local_file_result def test_graph_from_wikipedia(model_name): - """Test graph creation from a Wikipedia page.""" - wiki_query = 'https://en.wikipedia.org/wiki/Google_DeepMind' - source_type = 'Wikipedia' - file_name = "Google_DeepMind" - create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, source_type) - - wiki_result = extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 1, 'en', '', '') - logging.info("Wikipedia test done") - print(wiki_result) - - try: - assert wiki_result['status'] == 'Completed' - assert wiki_result['nodeCount'] > 0 - assert wiki_result['relationshipCount'] > 0 - print("Success") - except AssertionError as e: - print("Fail: ", e) - - return wiki_result + # try: + """Test graph creation from a Wikipedia page.""" + wiki_query = 'https://en.wikipedia.org/wiki/Ram_Mandir' + source_type = 'Wikipedia' + file_name = "Ram_Mandir" + create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, source_type) + + wiki_result = extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 'en',file_name, '', '',None) + logging.info("Wikipedia test done") + print(wiki_result) + try: + assert wiki_result['status'] == 'Completed' + assert wiki_result['nodeCount'] > 0 + assert wiki_result['relationshipCount'] > 0 + print("Success") + except AssertionError as e: + print("Fail: ", e) + + return wiki_result + # except Exception as ex: + # print(ex) def test_graph_website(model_name): - """Test graph creation from a Website page.""" - #graph, model, source_url, source_type - source_url = 'https://www.amazon.com/' - source_type = 'web-url' - create_source_node_graph_web_url(graph, model_name, source_url, source_type) - - weburl_result = extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', '') - logging.info("WebUrl test done") - print(weburl_result) - - try: - assert weburl_result['status'] == 'Completed' - assert weburl_result['nodeCount'] > 0 - assert weburl_result['relationshipCount'] > 0 - print("Success") - except AssertionError as e: - print("Fail: ", e) - return weburl_result - + """Test graph creation from a Website page.""" + #graph, model, source_url, source_type + source_url = 'https://www.amazon.com/' + source_type = 'web-url' + file_name = [] + create_source_node_graph_web_url(graph, model_name, source_url, source_type) + + weburl_result = extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url,file_name, '', '',None) + logging.info("WebUrl test done") + print(weburl_result) + + try: + assert weburl_result['status'] == 'Completed' + assert weburl_result['nodeCount'] > 0 + assert weburl_result['relationshipCount'] > 0 + print("Success") + except AssertionError as e: + print("Fail: ", e) + return weburl_result def test_graph_from_youtube_video(model_name): - """Test graph creation from a YouTube video.""" - source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' - source_type = 'youtube' - - create_source_node_graph_url_youtube(graph, model_name, source_url, source_type) - youtube_result = extract_graph_from_file_youtube( - URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', '' - ) - logging.info("YouTube Video test done") - print(youtube_result) - - try: - assert youtube_result['status'] == 'Completed' - assert youtube_result['nodeCount'] > 1 - assert youtube_result['relationshipCount'] > 1 - print("Success") - except AssertionError as e: - print("Failed: ", e) - - return youtube_result + """Test graph creation from a YouTube video.""" + source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' + source_type = 'youtube' + create_source_node_graph_url_youtube(graph, model_name, source_url, source_type) + youtube_result = extract_graph_from_file_youtube( + URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', '' + ) + logging.info("YouTube Video test done") + print(youtube_result) + + try: + assert youtube_result['status'] == 'Completed' + assert youtube_result['nodeCount'] > 1 + assert youtube_result['relationshipCount'] > 1 + print("Success") + except AssertionError as e: + print("Failed: ", e) + + return youtube_result def test_chatbot_qna(model_name, mode='vector'): - """Test chatbot QnA functionality for different modes.""" - QA_n_RAG = QA_RAG(graph, model_name, 'Tell me about amazon', '[]', 1, mode) - print(QA_n_RAG) - print(len(QA_n_RAG['message'])) - - try: - assert len(QA_n_RAG['message']) > 20 - return QA_n_RAG - print("Success") - except AssertionError as e: - print("Failed ", e) - return QA_n_RAG - + """Test chatbot QnA functionality for different modes.""" + QA_n_RAG = QA_RAG(graph, model_name, 'Tell me about amazon', '[]', 1, mode) + print(QA_n_RAG) + print(len(QA_n_RAG['message'])) + + + try: + assert len(QA_n_RAG['message']) > 20 + return QA_n_RAG + print("Success") + except AssertionError as e: + print("Failed ", e) + return QA_n_RAG + #Get Test disconnected_nodes list def disconected_nodes(): - #graph = create_graph_database_connection(uri, userName, password, database) - graphDb_data_Access = graphDBdataAccess(graph) - nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes() - print(nodes_list[0]["e"]["elementId"]) - status = "False" - - if total_nodes['total']>0: - status = "True" - else: - status = "False" - - return nodes_list[0]["e"]["elementId"], status - + #graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes() + print(nodes_list[0]["e"]["elementId"]) + status = "False" + if total_nodes['total']>0: + status = "True" + else: + status = "False" + return nodes_list[0]["e"]["elementId"], status + #Test Delete delete_disconnected_nodes list def delete_disconected_nodes(lst_element_id): - print(f'disconnect elementid list {lst_element_id}') - #graph = create_graph_database_connection(uri, userName, password, database) - graphDb_data_Access = graphDBdataAccess(graph) - result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id)) - print(f'delete disconnect api result {result}') - if not result: - return "True" - else: - return "False" + print(f'disconnect elementid list {lst_element_id}') + #graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id)) + print(f'delete disconnect api result {result}') + if not result: + return "True" + else: + return "False" #Test Get Duplicate_nodes def get_duplicate_nodes(): - #graph = create_graph_database_connection(uri, userName, password, database) - graphDb_data_Access = graphDBdataAccess(graph) - nodes_list, total_nodes = graphDb_data_Access.get_duplicate_nodes_list() - if total_nodes['total']>0: - return "True" - else: - return "False" - + #graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + nodes_list, total_nodes = graphDb_data_Access.get_duplicate_nodes_list() + if total_nodes['total']>0: + return "True" + else: + return "False" + #Test populate_graph_schema def test_populate_graph_schema_from_text(model): - result_schema = populate_graph_schema_from_text('When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble.', model, True) - print(result_schema) - return result_schema + result_schema = populate_graph_schema_from_text('When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble.', model, True) + print(result_schema) + return result_schema # def compare_graph_results(results): # """ @@ -200,44 +199,45 @@ def test_populate_graph_schema_from_text(model): # print(f"Result {i} differs from result {i+1}") def run_tests(): - final_list = [] - error_list = [] - models = ['openai-gpt-4o','gemini-1.5-pro'] - - for model_name in models: - try: - final_list.append(test_graph_from_file_local(model_name)) - final_list.append(test_graph_from_wikipedia(model_name)) - final_list.append(test_populate_graph_schema_from_text(model_name)) - final_list.append(test_graph_website(model_name)) - # final_list.append(test_graph_from_youtube_video(model_name)) - # final_list.append(test_chatbot_qna(model_name)) - # final_list.append(test_chatbot_qna(model_name, mode='vector')) - # final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext')) - except Exception as e: - error_list.append((model_name, str(e))) - # #Compare and log diffrences in graph results - # # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results - # test_populate_graph_schema_from_text('openai-gpt-4o') - dis_elementid, dis_status = disconected_nodes() - lst_element_id = [dis_elementid] - delt = delete_disconected_nodes(lst_element_id) - dup = get_duplicate_nodes() - # schma = test_populate_graph_schema_from_text(model) - # Save final results to CSV - df = pd.DataFrame(final_list) - print(df) - df['execution_date'] = dt.today().strftime('%Y-%m-%d') - df['disconnected_nodes']=dis_status - df['get_duplicate_nodes']=dup - df['delete_disconected_nodes']=delt - # df['test_populate_graph_schema_from_text'] = schma - df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) - - # Save error details to CSV - df_errors = pd.DataFrame(error_list, columns=['Model', 'Error']) - df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') - df_errors.to_csv(f"Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + final_list = [] + error_list = [] + models = ['openai-gpt-3.5','openai-gpt-4o','openai-gpt-4o-mini','gemini-1.0-pro','gemini-1.5-pro','azure_ai_gpt_35','azure_ai_gpt_4o','ollama_llama3','groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_v3p1_405b','bedrock_claude_3_5_sonnet'] + + for model_name in models: + try: + final_list.append(test_graph_from_file_local(model_name)) + final_list.append(test_graph_from_wikipedia(model_name)) + final_list.append(test_populate_graph_schema_from_text(model_name)) + final_list.append(test_graph_website(model_name)) + final_list.append(test_graph_from_youtube_video(model_name)) + final_list.append(test_chatbot_qna(model_name)) + final_list.append(test_chatbot_qna(model_name, mode='vector')) + final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext')) + except Exception as e: + error_list.append((model_name, str(e))) + # #Compare and log diffrences in graph results + # # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results + # test_populate_graph_schema_from_text('openai-gpt-4o') + dis_elementid, dis_status = disconected_nodes() + lst_element_id = [dis_elementid] + delt = delete_disconected_nodes(lst_element_id) + dup = get_duplicate_nodes() + print(final_list) + # schma = test_populate_graph_schema_from_text(model) + # Save final results to CSV + df = pd.DataFrame(final_list) + print(df) + df['execution_date'] = dt.today().strftime('%Y-%m-%d') + df['disconnected_nodes']=dis_status + df['get_duplicate_nodes']=dup + df['delete_disconected_nodes']=delt + # df['test_populate_graph_schema_from_text'] = schma + df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + + # Save error details to CSV + df_errors = pd.DataFrame(error_list, columns=['Model', 'Error']) + df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_errors.to_csv(f"Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) if __name__ == "__main__": - run_tests() \ No newline at end of file + run_tests() \ No newline at end of file From 801745e1ac773781cd8d7cd6fe9214c49e014df1 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Mon, 16 Sep 2024 16:49:43 +0530 Subject: [PATCH 086/292] Integrate local search to chat details (#746) * added the commuties tab * removed unused variables * removed scipy libarary * added the mode check * Integrated the communities tab * added the cjheck * enabled the top entities mode * tabs order rearange * added the loader to sources tab for entity search+vector * fixed the chat mode per prop --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> --- backend/requirements.txt | 3 +- .../src/components/ChatBot/ChatInfoModal.tsx | 113 +++++- frontend/src/components/ChatBot/Chatbot.tsx | 10 +- .../src/components/ChatBot/Info/InfoModal.tsx | 360 ------------------ frontend/src/components/Content.tsx | 33 +- .../Local/DropZoneForSmallLayouts.tsx | 3 +- .../components/Graph/CheckboxSelection.tsx | 14 +- .../src/components/Graph/GraphViewModal.tsx | 31 +- .../Deduplication/index.tsx | 4 +- .../Popups/Settings/SchemaFromText.tsx | 1 - frontend/src/context/UserCredentials.tsx | 2 +- frontend/src/services/ChunkEntitiesInfo.ts | 9 +- frontend/src/types.ts | 12 +- frontend/src/utils/Constants.ts | 42 +- frontend/src/utils/Utils.ts | 122 ++++-- 15 files changed, 295 insertions(+), 464 deletions(-) delete mode 100644 frontend/src/components/ChatBot/Info/InfoModal.tsx diff --git a/backend/requirements.txt b/backend/requirements.txt index b83a76b82..8ae9195bb 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -140,7 +140,6 @@ requests==2.32.3 rsa==4.9 s3transfer==0.10.1 safetensors==0.4.1 -scipy==1.10.1 shapely==2.0.3 six==1.16.0 sniffio==1.3.1 @@ -179,4 +178,4 @@ sentence-transformers==3.0.1 google-cloud-logging==3.10.0 PyMuPDF==1.24.5 pypandoc==1.13 -graphdatascience==1.10 +graphdatascience==1.10 \ No newline at end of file diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 3c6642665..2d4381f59 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -11,7 +11,12 @@ import { Banner, useMediaQuery, } from '@neo4j-ndl/react'; -import { DocumentDuplicateIconOutline, DocumentTextIconOutline } from '@neo4j-ndl/react/icons'; +import { + DocumentDuplicateIconOutline, + DocumentTextIconOutline, + ClipboardDocumentCheckIconOutline, + GlobeAltIconOutline, +} from '@neo4j-ndl/react/icons'; import '../../styling/info.css'; import Neo4jRetrievalLogo from '../../assets/images/Neo4jRetrievalLogo.png'; import wikipedialogo from '../../assets/images/wikipedia.svg'; @@ -20,6 +25,7 @@ import gcslogo from '../../assets/images/gcs.webp'; import s3logo from '../../assets/images/s3logo.png'; import { Chunk, + Community, Entity, ExtendedNode, ExtendedRelationship, @@ -34,10 +40,8 @@ import { chunkEntitiesAPI } from '../../services/ChunkEntitiesInfo'; import { useCredentials } from '../../context/UserCredentials'; import { calcWordColor } from '@neo4j-devtools/word-color'; import ReactMarkdown from 'react-markdown'; -import { GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; -import { parseEntity, youtubeLinkValidation } from '../../utils/Utils'; +import { getLogo, parseEntity, youtubeLinkValidation } from '../../utils/Utils'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; -import { ClipboardDocumentCheckIconOutline } from '@neo4j-ndl/react/icons'; import { tokens } from '@neo4j-ndl/base'; const ChatInfoModal: React.FC = ({ @@ -55,6 +59,7 @@ const ChatInfoModal: React.FC = ({ const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); const [activeTab, setActiveTab] = useState(error.length ? 10 : mode === 'graph' ? 4 : 3); const [infoEntities, setInfoEntities] = useState([]); + const [communities, setCommunities] = useState([]); const [loading, setLoading] = useState(false); const { userCredentials } = useCredentials(); const [nodes, setNodes] = useState([]); @@ -86,14 +91,25 @@ const ChatInfoModal: React.FC = ({ ], [copiedText, cypher_query] ); + useEffect(() => { if (mode != 'graph' || error?.trim() !== '') { - setLoading(true); - chunkEntitiesAPI(userCredentials as UserCredentials, chunk_ids.map((c) => c.id).join(',')) - .then((response) => { + (async () => { + setLoading(true); + try { + const response = await chunkEntitiesAPI( + userCredentials as UserCredentials, + chunk_ids.map((c) => c.id).join(','), + userCredentials?.database, + mode === 'entity search+vector' + ); + if (response.data.status === 'Failure') { + throw new Error(response.data.error); + } setInfoEntities(response.data.data.nodes); setNodes(response.data.data.nodes); setRelationships(response.data.data.relationships); + setCommunities(response.data.data.community_data); const chunks = response.data.data.chunk_data.map((chunk: any) => { const chunkScore = chunk_ids.find((chunkdetail) => chunkdetail.id === chunk.id); return { @@ -104,17 +120,17 @@ const ChatInfoModal: React.FC = ({ const sortedchunks = chunks.sort((a: any, b: any) => b.score - a.score); setChunks(sortedchunks); setLoading(false); - }) - .catch((error) => { + } catch (error) { console.error('Error fetching entities:', error); setLoading(false); - }); + } + })(); } - () => { setcopiedText(false); }; }, [chunk_ids, mode, error]); + const groupedEntities = useMemo<{ [key: string]: GroupedEntity }>(() => { return infoEntities.reduce((acc, entity) => { const { label, text } = parseEntity(entity); @@ -126,9 +142,11 @@ const ChatInfoModal: React.FC = ({ return acc; }, {} as Record; color: string }>); }, [infoEntities]); + const onChangeTabs = (tabId: number) => { setActiveTab(tabId); }; + const labelCounts = useMemo(() => { const counts: { [label: string]: number } = {}; for (let index = 0; index < infoEntities.length; index++) { @@ -139,6 +157,7 @@ const ChatInfoModal: React.FC = ({ } return counts; }, [infoEntities]); + const sortedLabels = useMemo(() => { return Object.keys(labelCounts).sort((a, b) => labelCounts[b] - labelCounts[a]); }, [labelCounts]); @@ -153,6 +172,7 @@ const ChatInfoModal: React.FC = ({ return ''; } }; + return ( @@ -176,7 +196,11 @@ const ChatInfoModal: React.FC = ({ ) : ( {mode != 'graph' ? Sources used : <>} - {mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? ( + {mode != 'graph' ? Chunks : <>} + {mode === 'graph+vector' || + mode === 'graph' || + mode === 'graph+vector+fulltext' || + mode === 'entity search+vector' ? ( Top Entities used ) : ( <> @@ -186,12 +210,46 @@ const ChatInfoModal: React.FC = ({ ) : ( <> )} - {mode != 'graph' ? Chunks : <>} + {mode === 'entity search+vector' ? Communities : <>} )} - {sources.length ? ( + {loading ? ( + + + + ) : mode === 'entity search+vector' && chunks.length ? ( +
    + {chunks + .map((c) => ({ fileName: c.fileName, fileSource: c.fileType })) + .map((s, index) => { + return ( +
  • +
    + {s.fileSource === 'local file' ? ( + + ) : ( + S3 Logo + )} + + {s.fileName} + +
    +
  • + ); + })} +
+ ) : sources.length ? (
    {sources.map((link, index) => { return ( @@ -436,6 +494,33 @@ const ChatInfoModal: React.FC = ({ className='min-h-40' /> + {mode === 'entity search+vector' ? ( + + {loading ? ( + + + + ) : ( +
    +
      + {communities.map((community, index) => ( +
    • +
      + + ID : + {community.id} + + {community.summary} +
      +
    • + ))} +
    +
    + )} +
    + ) : ( + <> + )} {activeTab == 4 && nodes.length && relationships.length ? ( diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 7470dbd4d..5e931ad35 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -89,6 +89,7 @@ const Chatbot: FC = (props) => { cypher_query?: string; graphonly_entities?: []; error?: string; + entitiysearchonly_entities?: chunk[]; }, index = 0 ) => { @@ -119,6 +120,7 @@ const Chatbot: FC = (props) => { cypher_query: response?.cypher_query, graphonly_entities: response?.graphonly_entities, error: response.error, + entitiysearchonly_entities: response.entitiysearchonly_entities, }, ]); } else { @@ -141,6 +143,7 @@ const Chatbot: FC = (props) => { lastmsg.cypher_query = response.cypher_query; lastmsg.graphonly_entities = response.graphonly_entities; lastmsg.error = response.error; + lastmsg.entities = response.entitiysearchonly_entities; return msgs.map((msg, index) => { if (index === msgs.length - 1) { return lastmsg; @@ -174,6 +177,7 @@ const Chatbot: FC = (props) => { let cypher_query; let graphonly_entities; let error; + let entitiysearchonly_entities; const datetime = `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`; const userMessage = { id: Date.now(), user: 'user', message: inputMessage, datetime: datetime }; setListMessages([...listMessages, userMessage]); @@ -198,6 +202,7 @@ const Chatbot: FC = (props) => { chatingMode = chatresponse?.data?.data?.info?.mode; cypher_query = chatresponse?.data?.data?.info?.cypher_query ?? ''; graphonly_entities = chatresponse?.data.data.info.context ?? []; + entitiysearchonly_entities = chatresponse?.data.data.info.entities; error = chatresponse.data.data.info.error ?? ''; const finalbotReply = { reply: chatbotReply, @@ -212,6 +217,7 @@ const Chatbot: FC = (props) => { cypher_query, graphonly_entities, error, + entitiysearchonly_entities, }; simulateTypingEffect(finalbotReply); } catch (error) { @@ -349,7 +355,9 @@ const Chatbot: FC = (props) => { setModelModal(chat.model ?? ''); setSourcesModal(chat.sources ?? []); setResponseTime(chat.response_time ?? 0); - setChunkModal(chat.chunk_ids ?? []); + setChunkModal( + chat.mode === 'entity search+vector' ? chat.entities ?? [] : chat.chunk_ids ?? [] + ); setTokensUsed(chat.total_tokens ?? 0); setcypherQuery(chat.cypher_query ?? ''); setShowInfoModal(true); diff --git a/frontend/src/components/ChatBot/Info/InfoModal.tsx b/frontend/src/components/ChatBot/Info/InfoModal.tsx deleted file mode 100644 index 0d5b82a64..000000000 --- a/frontend/src/components/ChatBot/Info/InfoModal.tsx +++ /dev/null @@ -1,360 +0,0 @@ -import { Box, Typography, TextLink, Flex, Tabs, LoadingSpinner } from '@neo4j-ndl/react'; -import { DocumentTextIconOutline } from '@neo4j-ndl/react/icons'; -import '../../../styling/info.css'; -import Neo4jRetrievalLogo from '../../../assets/images/Neo4jRetrievalLogo.png'; -import wikipedialogo from '../../../assets/images/Wikipedia-logo-v2.svg'; -import youtubelogo from '../../../assets/images/youtube.png'; -import gcslogo from '../../../assets/images/gcs.webp'; -import s3logo from '../../../assets/images/s3logo.png'; -import { - Chunk, - Entity, - ExtendedNode, - ExtendedRelationship, - GroupedEntity, - UserCredentials, - chatInfoMessage, -} from '../../../types'; -import { useEffect, useMemo, useState } from 'react'; -import HoverableLink from '../../UI/HoverableLink'; -import GraphViewButton from '../../Graph/GraphViewButton'; -import { chunkEntitiesAPI } from '../../../services/ChunkEntitiesInfo'; -import { useCredentials } from '../../../context/UserCredentials'; -import { calcWordColor } from '@neo4j-devtools/word-color'; -import ReactMarkdown from 'react-markdown'; -import { GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; -import { youtubeLinkValidation } from '../../../utils/Utils'; -const InfoModal: React.FC = ({ sources, model, total_tokens, response_time, chunk_ids }) => { - const [activeTab, setActiveTab] = useState(3); - const [infoEntities, setInfoEntities] = useState([]); - const [loading, setLoading] = useState(false); - const { userCredentials } = useCredentials(); - const [nodes, setNodes] = useState([]); - const [relationships, setRelationships] = useState([]); - const [chunks, setChunks] = useState([]); - const parseEntity = (entity: Entity) => { - const { labels, properties } = entity; - const [label] = labels; - const text = properties.id; - return { label, text }; - }; - useEffect(() => { - setLoading(true); - chunkEntitiesAPI(userCredentials as UserCredentials, chunk_ids.map((c) => c.id).join(',')) - .then((response) => { - setInfoEntities(response.data.data.nodes); - setNodes(response.data.data.nodes); - setRelationships(response.data.data.relationships); - const chunks = response.data.data.chunk_data.map((chunk: any) => { - const chunkScore = chunk_ids.find((chunkdetail) => chunkdetail.id === chunk.id); - return { - ...chunk, - score: chunkScore?.score, - }; - }); - const sortedchunks = chunks.sort((a: any, b: any) => b.score - a.score); - setChunks(sortedchunks); - setLoading(false); - }) - .catch((error) => { - console.error('Error fetching entities:', error); - setLoading(false); - }); - }, [chunk_ids]); - const groupedEntities = useMemo<{ [key: string]: GroupedEntity }>(() => { - return infoEntities.reduce((acc, entity) => { - const { label, text } = parseEntity(entity); - if (!acc[label]) { - const newColor = calcWordColor(label); - acc[label] = { texts: new Set(), color: newColor }; - } - acc[label].texts.add(text); - return acc; - }, {} as Record; color: string }>); - }, [infoEntities]); - const onChangeTabs = (tabId: number) => { - setActiveTab(tabId); - }; - const labelCounts = useMemo(() => { - const counts: { [label: string]: number } = {}; - for (let index = 0; index < infoEntities.length; index++) { - const entity = infoEntities[index]; - const { labels } = entity; - const [label] = labels; - counts[label] = counts[label] ? counts[label] + 1 : 1; - } - return counts; - }, [infoEntities]); - const sortedLabels = useMemo(() => { - return Object.keys(labelCounts).sort((a, b) => labelCounts[b] - labelCounts[a]); - }, [labelCounts]); - - const generateYouTubeLink = (url: string, startTime: string) => { - try { - const urlObj = new URL(url); - urlObj.searchParams.set('t', startTime); - return urlObj.toString(); - } catch (error) { - console.error('Invalid URL:', error); - return ''; - } - }; - return ( - - - - - Retrieval information - - To generate this response, in {response_time} seconds we used{' '} - {total_tokens} tokens with the model{' '} - {model}. - - - - - Sources used - Top Entities used - Chunks - - - - {sources.length ? ( -
      - {sources.map((link, index) => { - return ( -
    • - {link?.startsWith('http') || link?.startsWith('https') ? ( - <> - {link?.includes('wikipedia.org') && ( -
      - Wikipedia Logo - - - - {link} - - - -
      - )} - {link?.includes('storage.googleapis.com') && ( -
      - Google Cloud Storage Logo - - {decodeURIComponent(link).split('/').at(-1)?.split('?')[0] ?? 'GCS File'} - -
      - )} - {link?.startsWith('s3://') && ( -
      - S3 Logo - - {decodeURIComponent(link).split('/').at(-1) ?? 'S3 File'} - -
      - )} - {youtubeLinkValidation(link) && ( - <> -
      - - - - - {link} - - - -
      - - )} - {!link?.startsWith('s3://') && - !link?.includes('storage.googleapis.com') && - !link?.includes('wikipedia.org') && - !link?.includes('youtube.com') && ( -
      - - - {link} - -
      - )} - - ) : ( -
      - - - {link} - - {/* {chunks?.length > 0 && ( - - - Page{' '} - {chunks - .map((c) => c.page_number as number) - .sort((a, b) => a - b) - .join(', ')} - - )} */} -
      - )} -
    • - ); - })} -
    - ) : ( - No Sources Found - )} -
    - - {loading ? ( - - - - ) : Object.keys(groupedEntities).length > 0 ? ( -
      - {sortedLabels.map((label, index) => ( -
    • -
      - {label} ({labelCounts[label]}) -
      - - {Array.from(groupedEntities[label].texts).slice(0, 3).join(', ')} - -
    • - ))} -
    - ) : ( - No Entities Found - )} -
    - - {loading ? ( - - - - ) : chunks.length > 0 ? ( -
    -
      - {chunks.map((chunk) => ( -
    • - {chunk?.page_number ? ( - <> -
      - - - {/* {chunk?.fileName}, Page: {chunk?.page_number} */} - {chunk?.fileName} - -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.start_time ? ( - <> -
      - - - - {chunk?.fileName} - - -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.url.includes('wikipedia.org') ? ( - <> -
      - - {chunk?.fileName} -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.url.includes('storage.googleapis.com') ? ( - <> -
      - - {chunk?.fileName} -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.url.startsWith('s3://') ? ( - <> -
      - - {chunk?.fileName} -
      - - ) : chunk?.url && - !chunk?.url.startsWith('s3://') && - !chunk?.url.includes('storage.googleapis.com') && - !chunk?.url.includes('wikipedia.org') && - !chunk?.url.includes('youtube.com') ? ( - <> -
      - - - {chunk?.url} - -
      - Similarity Score: {chunk?.score} - - ) : ( - <> - )} - {chunk?.text} -
    • - ))} -
    -
    - ) : ( - No Chunks Found - )} -
    -
    - {activeTab == 4 && nodes.length && relationships.length ? ( - - - - ) : ( - <> - )} -
    - ); -}; -export default InfoModal; diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 648a86ac6..5862abe05 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -59,7 +59,8 @@ const Content: React.FC = ({ }); const [openGraphView, setOpenGraphView] = useState(false); const [inspectedName, setInspectedName] = useState(''); - const { setUserCredentials, userCredentials, connectionStatus, setConnectionStatus, isGdsActive, setGdsActive } = useCredentials(); + const { setUserCredentials, userCredentials, connectionStatus, setConnectionStatus, isGdsActive, setGdsActive } = + useCredentials(); const [showConfirmationModal, setshowConfirmationModal] = useState(false); const [extractLoading, setextractLoading] = useState(false); const [retryFile, setRetryFile] = useState(''); @@ -477,8 +478,9 @@ const Content: React.FC = ({ const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; const uriCoded = userCredentials?.uri.replace(/:\d+$/, ''); - const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${userCredentials?.port ?? '7687' - }`; + const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${ + userCredentials?.port ?? '7687' + }`; const encodedURL = encodeURIComponent(connectURL); const replacedUrl = bloomUrl?.replace('{CONNECT_URL}', encodedURL); window.open(replacedUrl, '_blank'); @@ -488,10 +490,10 @@ const Content: React.FC = ({ isLeftExpanded && isRightExpanded ? 'contentWithExpansion' : isRightExpanded - ? 'contentWithChatBot' - : !isLeftExpanded && !isRightExpanded - ? 'w-[calc(100%-128px)]' - : 'contentWithDropzoneExpansion'; + ? 'contentWithChatBot' + : !isLeftExpanded && !isRightExpanded + ? 'w-[calc(100%-128px)]' + : 'contentWithDropzoneExpansion'; const handleGraphView = () => { setOpenGraphView(true); @@ -521,12 +523,12 @@ const Content: React.FC = ({ return prev.map((f) => { return f.name === filename ? { - ...f, - status: 'Reprocess', - processingProgress: isStartFromBegining ? 0 : f.processingProgress, - NodesCount: isStartFromBegining ? 0 : f.NodesCount, - relationshipCount: isStartFromBegining ? 0 : f.relationshipCount, - } + ...f, + status: 'Reprocess', + processingProgress: isStartFromBegining ? 0 : f.processingProgress, + NodesCount: isStartFromBegining ? 0 : f.NodesCount, + relationshipCount: isStartFromBegining ? 0 : f.relationshipCount, + } : f; }); }); @@ -815,8 +817,9 @@ const Content: React.FC = ({ handleGenerateGraph={processWaitingFilesOnRefresh} > diff --git a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx index 21bd47240..17c97d0bc 100644 --- a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx +++ b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx @@ -142,7 +142,7 @@ export default function DropZoneForSmallLayouts() { uploadNextChunk(); }; - const { acceptedFiles, getRootProps, getInputProps } = useDropzone({ + const { getRootProps, getInputProps } = useDropzone({ accept: { 'application/pdf': ['.pdf'], 'image/*': ['.jpeg', '.jpg', '.png', '.svg'], @@ -214,7 +214,6 @@ export default function DropZoneForSmallLayouts() { setFilesData(copiedFilesData); } }; - console.log(acceptedFiles); return ( <>
    diff --git a/frontend/src/components/Graph/CheckboxSelection.tsx b/frontend/src/components/Graph/CheckboxSelection.tsx index 4f344d58c..b335a4324 100644 --- a/frontend/src/components/Graph/CheckboxSelection.tsx +++ b/frontend/src/components/Graph/CheckboxSelection.tsx @@ -18,12 +18,14 @@ const CheckboxSelection: React.FC = ({ graphType, loading, disabled={loading} onChange={() => handleChange('Entities')} /> - {isgds && ( handleChange('Communities')} - />)} + {isgds && ( + handleChange('Communities')} + /> + )}
); diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 2a58adb25..c29ae1c75 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -98,10 +98,10 @@ const GraphViewModal: React.FunctionComponent = ({ graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -136,10 +136,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -182,7 +182,7 @@ const GraphViewModal: React.FunctionComponent = ({ useEffect(() => { if (open) { setLoading(true); - setGraphType(intitalGraphType(isGdsActive)) + setGraphType(intitalGraphType(isGdsActive)); if (viewPoint !== 'chatInfoView') { graphApi(); } else { @@ -246,8 +246,8 @@ const GraphViewModal: React.FunctionComponent = ({ match && viewPoint === graphLabels.showGraphView ? 100 : match && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, + ? 50 + : graphLabels.nodeSize, }; }); // deactivating any active relationships @@ -264,7 +264,6 @@ const GraphViewModal: React.FunctionComponent = ({ [nodes] ); - // Unmounting the component if (!open) { return <>; @@ -357,8 +356,8 @@ const GraphViewModal: React.FunctionComponent = ({ isActive && viewPoint === graphLabels.showGraphView ? 100 : isActive && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, + ? 50 + : graphLabels.nodeSize, }; }); // deactivating any active relationships @@ -399,10 +398,6 @@ const GraphViewModal: React.FunctionComponent = ({ setRelationships(updatedRelations); setNodes(updatedNodes); }; - - console.log('rels', relationships); - - console.log('nodes', nodes); return ( <> { setDuplicateNodes((prev) => { return prev.map((d) => - d.e.elementId === nodeid + (d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d + : d) ); }); }; diff --git a/frontend/src/components/Popups/Settings/SchemaFromText.tsx b/frontend/src/components/Popups/Settings/SchemaFromText.tsx index 8ebc15020..914d9580c 100644 --- a/frontend/src/components/Popups/Settings/SchemaFromText.tsx +++ b/frontend/src/components/Popups/Settings/SchemaFromText.tsx @@ -27,7 +27,6 @@ const SchemaFromTextDialog = ({ try { setloading(true); const response = await getNodeLabelsAndRelTypesFromText(model, userText, isSchema); - console.log({ response }); setloading(false); if (response.data.status === 'Success') { if (response.data?.data?.labels.length) { diff --git a/frontend/src/context/UserCredentials.tsx b/frontend/src/context/UserCredentials.tsx index 8530cacfa..1b5eda2da 100644 --- a/frontend/src/context/UserCredentials.tsx +++ b/frontend/src/context/UserCredentials.tsx @@ -28,7 +28,7 @@ const UserCredentialsWrapper: FunctionComponent = (props) => { setGdsActive, connectionStatus, setConnectionStatus, - } + }; return {props.children}; }; diff --git a/frontend/src/services/ChunkEntitiesInfo.ts b/frontend/src/services/ChunkEntitiesInfo.ts index aa133c815..f69ddd3aa 100644 --- a/frontend/src/services/ChunkEntitiesInfo.ts +++ b/frontend/src/services/ChunkEntitiesInfo.ts @@ -1,13 +1,20 @@ import { ChatInfo_APIResponse, UserCredentials } from '../types'; import api from '../API/Index'; -const chunkEntitiesAPI = async (userCredentials: UserCredentials, chunk_ids: string) => { +const chunkEntitiesAPI = async ( + userCredentials: UserCredentials, + chunk_ids: string, + database: string = 'neo4j', + is_entity: boolean = false +) => { try { const formData = new FormData(); formData.append('uri', userCredentials?.uri ?? ''); formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); formData.append('chunk_ids', chunk_ids); + formData.append('database', database); + formData.append('is_entity', String(is_entity)); const response: ChatInfo_APIResponse = await api.post(`/chunk_entities`, formData, { headers: { diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 625e90037..d22ce02fb 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -84,7 +84,7 @@ export type UploadParams = { originalname: string; } & { [key: string]: any }; -export type FormDataParams = ExtractParams | UploadParams; +export type ForDataParams = ExtractParams | UploadParams; export interface DropdownProps { onSelect: (option: OptionType | null | void) => void; @@ -223,6 +223,7 @@ export interface Messages { cypher_query?: string; graphonly_entities?: []; error?: string; + entities?: chunk[]; } export type ChatbotProps = { @@ -464,7 +465,13 @@ export type Entity = { id: string; }; }; - +export type Community = { + id: string; + summary: string; + weight: number; + level: number; + community_rank: number; +}; export type GroupedEntity = { texts: Set; color: string; @@ -502,6 +509,7 @@ export interface Chunk { url?: string; fileSource: string; score?: string; + fileType: string; } export interface SpeechSynthesisProps { diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index a3f77c134..cf2c2619c 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -36,30 +36,38 @@ export const llms = process.env?.VITE_LLM_MODELS?.trim() != '' ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) : [ - 'diffbot', - 'openai-gpt-3.5', - 'openai-gpt-4o', - 'openai-gpt-4o-mini', - 'gemini-1.0-pro', - 'gemini-1.5-pro', - 'azure_ai_gpt_35', - 'azure_ai_gpt_4o', - 'ollama_llama3', - 'groq_llama3_70b', - 'anthropic_claude_3_5_sonnet', - 'fireworks_v3p1_405b', - 'bedrock_claude_3_5_sonnet', - ]; + 'diffbot', + 'openai-gpt-3.5', + 'openai-gpt-4o', + 'openai-gpt-4o-mini', + 'gemini-1.0-pro', + 'gemini-1.5-pro', + 'azure_ai_gpt_35', + 'azure_ai_gpt_4o', + 'ollama_llama3', + 'groq_llama3_70b', + 'anthropic_claude_3_5_sonnet', + 'fireworks_v3p1_405b', + 'bedrock_claude_3_5_sonnet', + ]; export const defaultLLM = llms?.includes('openai-gpt-4o') ? 'openai-gpt-4o' : llms?.includes('gemini-1.0-pro') - ? 'gemini-1.0-pro' - : 'diffbot'; + ? 'gemini-1.0-pro' + : 'diffbot'; export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',') - : ['vector', 'graph', 'graph+vector', 'fulltext', 'graph+vector+fulltext', 'entity search+vector', 'global community']; + : [ + 'vector', + 'graph', + 'graph+vector', + 'fulltext', + 'graph+vector+fulltext', + 'entity search+vector', + 'global community', + ]; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; export const timePerByte = 0.2; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index f332abb6e..7a6283c27 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -1,6 +1,24 @@ import { calcWordColor } from '@neo4j-devtools/word-color'; import type { Relationship } from '@neo4j-nvl/base'; -import { CustomFile, Entity, ExtendedNode, ExtendedRelationship, GraphType, Messages, Scheme, SourceNode, UserCredentials } from '../types'; +import { + CustomFile, + Entity, + ExtendedNode, + ExtendedRelationship, + GraphType, + Messages, + Scheme, + SourceNode, + UserCredentials, +} from '../types'; +import Wikipediadarkmode from '../assets/images/wikipedia-darkmode.svg'; +import Wikipediadlogo from '../assets/images/wikipedia.svg'; +import webdarklogo from '../assets/images/web-darkmode.svg'; +import weblogo from '../assets/images/web.svg'; +import youtubedarklogo from '../assets/images/youtube-darkmode.svg'; +import youtubelightlogo from '../assets/images/youtube-lightmode.svg'; +import s3logo from '../assets/images/s3logo.png'; +import gcslogo from '../assets/images/gcs.webp'; // Get the Url export const url = () => { @@ -172,27 +190,33 @@ export const processGraphData = (neoNodes: ExtendedNode[], neoRels: ExtendedRela }; /** -* Filters nodes, relationships, and scheme based on the selected graph types. -* -* @param graphType - An array of graph types to filter by (e.g., 'DocumentChunk', 'Entities', 'Communities'). -* @param allNodes - An array of all nodes present in the graph. -* @param allRelationships - An array of all relationships in the graph. -* @param scheme - The scheme object containing node and relationship information. -* @returns An object containing filtered nodes, relationships, and scheme based on the selected graph types. -*/ + * Filters nodes, relationships, and scheme based on the selected graph types. + * + * @param graphType - An array of graph types to filter by (e.g., 'DocumentChunk', 'Entities', 'Communities'). + * @param allNodes - An array of all nodes present in the graph. + * @param allRelationships - An array of all relationships in the graph. + * @param scheme - The scheme object containing node and relationship information. + * @returns An object containing filtered nodes, relationships, and scheme based on the selected graph types. + */ export const filterData = ( graphType: GraphType[], allNodes: ExtendedNode[], allRelationships: Relationship[], scheme: Scheme, - isGdsActive: boolean, + isGdsActive: boolean ) => { let filteredNodes: ExtendedNode[] = []; let filteredRelations: Relationship[] = []; let filteredScheme: Scheme = {}; - const entityTypes = Object.keys(scheme).filter((type) => type !== 'Document' && type !== 'Chunk' && type !== '__Community__'); + const entityTypes = Object.keys(scheme).filter( + (type) => type !== 'Document' && type !== 'Chunk' && type !== '__Community__' + ); // Only Document + Chunk - if (graphType.includes('DocumentChunk') && !graphType.includes('Entities') && (!graphType.includes('Communities') || !isGdsActive)) { + if ( + graphType.includes('DocumentChunk') && + !graphType.includes('Entities') && + (!graphType.includes('Communities') || !isGdsActive) + ) { filteredNodes = allNodes.filter( (node) => (node.labels.includes('Document') && node.properties.fileName) || node.labels.includes('Chunk') ); @@ -205,7 +229,11 @@ export const filterData = ( ); filteredScheme = { Document: scheme.Document, Chunk: scheme.Chunk }; // Only Entity - } else if (graphType.includes('Entities') && !graphType.includes('DocumentChunk') && (!graphType.includes('Communities') || !isGdsActive)) { + } else if ( + graphType.includes('Entities') && + !graphType.includes('DocumentChunk') && + (!graphType.includes('Communities') || !isGdsActive) + ) { const entityNodes = allNodes.filter((node) => !node.labels.includes('Document') && !node.labels.includes('Chunk')); filteredNodes = entityNodes ? entityNodes : []; const nodeIds = new Set(filteredNodes.map((node) => node.id)); @@ -217,7 +245,12 @@ export const filterData = ( ); filteredScheme = Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])) as Scheme; // Only Communities - } else if (graphType.includes('Communities') && !graphType.includes('DocumentChunk') && !graphType.includes('Entities') && isGdsActive) { + } else if ( + graphType.includes('Communities') && + !graphType.includes('DocumentChunk') && + !graphType.includes('Entities') && + isGdsActive + ) { filteredNodes = allNodes.filter((node) => node.labels.includes('__Community__')); const nodeIds = new Set(filteredNodes.map((node) => node.id)); filteredRelations = allRelationships.filter( @@ -226,10 +259,16 @@ export const filterData = ( ); filteredScheme = { __Community__: scheme.__Community__ }; // Document + Chunk + Entity - } else if (graphType.includes('DocumentChunk') && graphType.includes('Entities') && (!graphType.includes('Communities') || !isGdsActive)) { + } else if ( + graphType.includes('DocumentChunk') && + graphType.includes('Entities') && + (!graphType.includes('Communities') || !isGdsActive) + ) { filteredNodes = allNodes.filter( (node) => - (node.labels.includes('Document') && node.properties.fileName) || node.labels.includes('Chunk') || !node.labels.includes('Document') && !node.labels.includes('Chunk') && !node.labels.includes('__Community__') + (node.labels.includes('Document') && node.properties.fileName) || + node.labels.includes('Chunk') || + (!node.labels.includes('Document') && !node.labels.includes('Chunk') && !node.labels.includes('__Community__')) ); const nodeIds = new Set(filteredNodes.map((node) => node.id)); filteredRelations = allRelationships.filter( @@ -238,9 +277,18 @@ export const filterData = ( nodeIds.has(rel.from) && nodeIds.has(rel.to) ); - filteredScheme = { Document: scheme.Document, Chunk: scheme.Chunk, ...Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])) }; + filteredScheme = { + Document: scheme.Document, + Chunk: scheme.Chunk, + ...Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])), + }; // Entities + Communities - } else if (graphType.includes('Entities') && graphType.includes('Communities') && !graphType.includes('DocumentChunk') && isGdsActive) { + } else if ( + graphType.includes('Entities') && + graphType.includes('Communities') && + !graphType.includes('DocumentChunk') && + isGdsActive + ) { const entityNodes = allNodes.filter((node) => !node.labels.includes('Document') && !node.labels.includes('Chunk')); const communityNodes = allNodes.filter((node) => node.labels.includes('__Community__')); filteredNodes = [...entityNodes, ...communityNodes]; @@ -253,10 +301,15 @@ export const filterData = ( ); filteredScheme = { ...Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])), - __Community__: scheme.__Community__ + __Community__: scheme.__Community__, }; // Document + Chunk + Communities - } else if (graphType.includes('DocumentChunk') && graphType.includes('Communities') && !graphType.includes('Entities') && isGdsActive) { + } else if ( + graphType.includes('DocumentChunk') && + graphType.includes('Communities') && + !graphType.includes('Entities') && + isGdsActive + ) { const documentChunkNodes = allNodes.filter( (node) => (node.labels.includes('Document') && node.properties.fileName) || node.labels.includes('Chunk') ); @@ -265,13 +318,20 @@ export const filterData = ( const nodeIds = new Set(filteredNodes.map((node) => node.id)); filteredRelations = allRelationships.filter( (rel) => - ['PART_OF', 'FIRST_CHUNK', 'SIMILAR', 'NEXT_CHUNK', 'IN_COMMUNITY', 'PARENT_COMMUNITY'].includes(rel.caption ?? '') && + ['PART_OF', 'FIRST_CHUNK', 'SIMILAR', 'NEXT_CHUNK', 'IN_COMMUNITY', 'PARENT_COMMUNITY'].includes( + rel.caption ?? '' + ) && nodeIds.has(rel.from) && nodeIds.has(rel.to) ); filteredScheme = { Document: scheme.Document, Chunk: scheme.Chunk, __Community__: scheme.__Community__ }; // Document + Chunk + Entity + Communities (All types) - } else if (graphType.includes('DocumentChunk') && graphType.includes('Entities') && graphType.includes('Communities') && isGdsActive) { + } else if ( + graphType.includes('DocumentChunk') && + graphType.includes('Entities') && + graphType.includes('Communities') && + isGdsActive + ) { filteredNodes = allNodes; filteredRelations = allRelationships; filteredScheme = scheme; @@ -349,3 +409,21 @@ export const capitalizeWithPlus = (s: string) => { .map((s) => capitalize(s)) .join('+'); }; +export const getLogo = (mode: string): Record => { + if (mode === 'light') { + return { + Wikipedia: Wikipediadarkmode, + 'web-url': webdarklogo, + 's3 bucket': s3logo, + youtube: youtubedarklogo, + 'gcs bucket': gcslogo, + }; + } + return { + Wikipedia: Wikipediadlogo, + 'web-url': weblogo, + 's3 bucket': s3logo, + youtube: youtubelightlogo, + 'gcs bucket': gcslogo, + }; +}; From 107f065df31dbd4d83d20cd4c8782ec5893d566b Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Mon, 16 Sep 2024 12:01:18 +0000 Subject: [PATCH 087/292] removal of unused code --- frontend/src/components/FileTable.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 04e2a8841..f6a0c0c08 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -35,7 +35,7 @@ import { } from '../utils/Utils'; import { SourceNode, CustomFile, FileTableProps, UserCredentials, statusupdate, ChildRef } from '../types'; import { useCredentials } from '../context/UserCredentials'; -import { ArrowPathIconSolid, MagnifyingGlassCircleIconSolid } from '@neo4j-ndl/react/icons'; +import {MagnifyingGlassCircleIconSolid } from '@neo4j-ndl/react/icons'; import CustomProgressBar from './UI/CustomProgressBar'; import subscribe from '../services/PollingAPI'; import { triggerStatusUpdateAPI } from '../services/ServerSideStatusUpdateAPI'; @@ -49,7 +49,7 @@ import IndeterminateCheckbox from './UI/CustomCheckBox'; import { showErrorToast, showNormalToast } from '../utils/toasts'; let onlyfortheFirstRender = true; const FileTable = forwardRef((props, ref) => { - const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry } = props; + const { isExpanded, connectionStatus, setConnectionStatus, onInspect } = props; const { filesData, setFilesData, model, rowSelection, setRowSelection, setSelectedRows, setProcessedCount, queue } = useFileContext(); const { userCredentials } = useCredentials(); From 8f0a706062cc8f933df01fb446df79838c482596 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Mon, 16 Sep 2024 12:06:17 +0000 Subject: [PATCH 088/292] removed entity label --- backend/src/chunkid_entities.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index 89ea069f6..30b1c15a9 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -95,6 +95,10 @@ def remove_duplicate_nodes(nodes,property="element_id"): for node in nodes: element_id = node[property] if element_id not in seen_element_ids: + if "labels" in node.keys(): + labels = set(node["labels"]) + labels.discard("__Entity__") + node["labels"] = list(labels) unique_nodes.append(node) seen_element_ids.add(element_id) From bc09b864fe4ba1b4ad87b8130e675738a6370b9b Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Mon, 16 Sep 2024 18:28:27 +0530 Subject: [PATCH 089/292] Added Description to chat mode menu (#743) * added tooltips to chat mode menu * addition of description to menu * 741-tooltips-to-selectOptions * menu changes * acommunities name change * close changes * name changes * format fixes --- frontend/src/App.css | 26 +++++-- .../src/components/ChatBot/ChatModeToggle.tsx | 78 +++++++++---------- frontend/src/components/Graph/LegendsChip.tsx | 11 ++- frontend/src/components/Layout/SideNav.tsx | 2 + .../Deduplication/index.tsx | 4 +- frontend/src/components/UI/Menu.tsx | 31 ++++---- frontend/src/types.ts | 2 +- frontend/src/utils/Constants.ts | 51 ++++++++++-- frontend/src/utils/Utils.ts | 21 ++++- 9 files changed, 150 insertions(+), 76 deletions(-) diff --git a/frontend/src/App.css b/frontend/src/App.css index fe285c972..615b8559b 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -121,7 +121,8 @@ height: 55px; object-fit: contain; } -.webImg{ + +.webImg { width: 80px; height: 80px; } @@ -240,9 +241,11 @@ .ndl-widget-content>div { overflow-wrap: break-word } -.word-break{ + +.word-break { word-break: break-word; } + .cellClass { width: 100%; height: 100%; @@ -345,22 +348,28 @@ margin-bottom: 0 !important; } -.node_label__value-container--has-value ,.relationship_label__value-container--has-value{ +.node_label__value-container--has-value, +.relationship_label__value-container--has-value { max-height: 215px; overflow-y: scroll !important; scrollbar-width: thin; } -.entity_extraction_Tab_node_label__value-container--has-value,.entity_extraction_Tab_relationship_label__value-container--has-value{ + +.entity_extraction_Tab_node_label__value-container--has-value, +.entity_extraction_Tab_relationship_label__value-container--has-value { max-height: 100px; overflow-y: scroll !important; scrollbar-width: thin; } -.tablet_entity_extraction_Tab_node_label__value-container--has-value,.tablet_entity_extraction_Tab_relationship_label__value-container--has-value{ + +.tablet_entity_extraction_Tab_node_label__value-container--has-value, +.tablet_entity_extraction_Tab_relationship_label__value-container--has-value { max-height: 80px; overflow-y: scroll !important; scrollbar-width: thin; } -.widthunset{ + +.widthunset { width: initial !important; height: initial !important; } @@ -372,4 +381,9 @@ .text-input-container.search-initiated { width: 60dvh; +} + +.custom-menu { + min-width: 250px; + max-width: 300px; } \ No newline at end of file diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 6315394af..a0ebe879a 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -1,4 +1,4 @@ -import { StatusIndicator } from '@neo4j-ndl/react'; +import { StatusIndicator, Typography } from '@neo4j-ndl/react'; import { useMemo } from 'react'; import { useFileContext } from '../../context/UsersFiles'; import CustomMenu from '../UI/Menu'; @@ -22,6 +22,39 @@ export default function ChatModeToggle({ const { setchatMode, chatMode, postProcessingTasks } = useFileContext(); const isCommunityAllowed = postProcessingTasks.includes('create_communities'); const { isGdsActive } = useCredentials(); + const memoizedChatModes = useMemo(() => { + return isGdsActive && isCommunityAllowed + ? chatModes + : chatModes?.filter((m) => !m.mode.includes('entity search+vector')); + }, [isGdsActive, isCommunityAllowed]); + const menuItems = useMemo(() => { + return memoizedChatModes?.map((m) => ({ + title: ( +
+ + {m.mode.includes('+') ? capitalizeWithPlus(m.mode) : capitalize(m.mode)} + +
+ {m.description} +
+
+ ), + onClick: () => { + setchatMode(m.mode); + closeHandler(); // Close the menu after setting the chat mode + }, + disabledCondition: false, + description: ( + + {chatMode === m.mode && ( + <> + Selected + + )} + + ), + })); + }, [chatMode, memoizedChatModes, setchatMode, closeHandler]); return ( { - if (isGdsActive && isCommunityAllowed) { - return chatModes?.map((m) => { - return { - title: m.includes('+') ? capitalizeWithPlus(m) : capitalize(m), - onClick: () => { - setchatMode(m); - }, - disabledCondition: false, - description: ( - - {chatMode === m && ( - <> - Selected - - )} - - ), - }; - }); - } - return chatModes - ?.filter((s) => !s.includes('community')) - ?.map((m) => { - return { - title: m.includes('+') ? capitalizeWithPlus(m) : capitalize(m), - onClick: () => { - setchatMode(m); - }, - disabledCondition: false, - description: ( - - {chatMode === m && ( - <> - Selected - - )} - - ), - }; - }); - }, [chatMode, chatModes, isCommunityAllowed, isGdsActive])} + items={menuItems} /> ); } diff --git a/frontend/src/components/Graph/LegendsChip.tsx b/frontend/src/components/Graph/LegendsChip.tsx index 2ab9c7b40..832150fad 100644 --- a/frontend/src/components/Graph/LegendsChip.tsx +++ b/frontend/src/components/Graph/LegendsChip.tsx @@ -1,6 +1,15 @@ import { LegendChipProps } from '../../types'; +import { graphLabels } from '../../utils/Constants'; import Legend from '../UI/Legend'; export const LegendsChip: React.FunctionComponent = ({ scheme, label, type, count, onClick }) => { - return ; + return ( + + ); }; diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index ddc29dd02..bdc43c8be 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -203,6 +203,7 @@ const SideNav: React.FC = ({ {!isChatModalOpen && ( { setchatModeAnchor(e.currentTarget); setshowChatMode(true); @@ -217,6 +218,7 @@ const SideNav: React.FC = ({ closeHandler={() => setshowChatMode(false)} menuAnchor={chatModeAnchor} disableBackdrop={true} + anchorPortal={true} > } diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index f5a021e30..36fcff3d1 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -80,12 +80,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - (d.e.elementId === nodeid + d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d) + : d ); }); }; diff --git a/frontend/src/components/UI/Menu.tsx b/frontend/src/components/UI/Menu.tsx index 4cfd1e238..62cc25095 100644 --- a/frontend/src/components/UI/Menu.tsx +++ b/frontend/src/components/UI/Menu.tsx @@ -1,6 +1,5 @@ import { Menu } from '@neo4j-ndl/react'; import { Menuitems, Origin } from '../../types'; - export default function CustomMenu({ open, closeHandler, @@ -23,25 +22,29 @@ export default function CustomMenu({ return ( { + closeHandler(); + }} anchorOrigin={anchorOrigin} transformOrigin={transformOrigin} anchorPortal={anchorPortal} anchorEl={MenuAnchor} disableBackdrop={disableBackdrop} + className='custom-menu' > - {items?.map((i, idx) => { - return ( - - ); - })} + {items?.map((i, idx) => ( + { + i.onClick(); + closeHandler(); + }} + disabled={i.disabledCondition} + className={i.isSelected ? i.selectedClassName : ''} + description={i.description} + /> + ))} ); } diff --git a/frontend/src/types.ts b/frontend/src/types.ts index d22ce02fb..8b00b1695 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -534,7 +534,7 @@ export interface SettingsModalProps { onClear?: () => void; } export interface Menuitems { - title: string; + title: string | JSX.Element; onClick: () => void; disabledCondition: boolean; description?: string | React.ReactNode; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index cf2c2619c..e5cb806c8 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -1,5 +1,6 @@ import { NvlOptions } from '@neo4j-nvl/base'; import { GraphType, OptionType } from '../types'; +import { getDescriptionForChatMode } from './Utils'; export const document = `+ [docs]`; @@ -28,10 +29,12 @@ export const docChunkEntities = `+[chunks] + collect { MATCH p=(c)-[:SIMILAR]-() RETURN p } // similar-chunks //chunks with entities + collect { OPTIONAL MATCH p=(c:Chunk)-[:HAS_ENTITY]->(e)-[*0..1]-(:!Chunk) RETURN p }`; + export const APP_SOURCES = process.env.VITE_REACT_APP_SOURCES !== '' ? (process.env.VITE_REACT_APP_SOURCES?.split(',') as string[]) : ['gcs', 's3', 'local', 'wiki', 'youtube', 'web']; + export const llms = process.env?.VITE_LLM_MODELS?.trim() != '' ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) @@ -56,18 +59,50 @@ export const defaultLLM = llms?.includes('openai-gpt-4o') : llms?.includes('gemini-1.0-pro') ? 'gemini-1.0-pro' : 'diffbot'; + +// export const chatModes = +// process.env?.VITE_CHAT_MODES?.trim() != '' +// ? process.env.VITE_CHAT_MODES?.split(',') +// : ['vector', 'graph', 'graph+vector', 'fulltext', 'graph+vector+fulltext', 'local community', 'global community']; + export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' - ? process.env.VITE_CHAT_MODES?.split(',') + ? process.env.VITE_CHAT_MODES?.split(',').map((mode) => ({ + mode: mode.trim(), + description: getDescriptionForChatMode(mode.trim()), + })) : [ - 'vector', - 'graph', - 'graph+vector', - 'fulltext', - 'graph+vector+fulltext', - 'entity search+vector', - 'global community', + { + mode: 'vector', + description: 'Utilizes vector indexing on text chunks to enable semantic similarity search.', + }, + { + mode: 'graph', + description: + 'Leverages text-to-cypher translation to query a database and retrieve relevant data, ensuring a highly targeted and contextually accurate response.', + }, + { + mode: 'graph+vector', + description: + 'Combines vector indexing on text chunks with graph connections, enhancing search results with contextual relevance by considering relationships between concepts.', + }, + { + mode: 'fulltext', + description: + 'Employs a fulltext index on text chunks for rapid keyword-based search, efficiently identifying documents containing specific words or phrases.', + }, + { + mode: 'graph+vector+fulltext', + description: + 'Merges vector indexing, graph connections, and fulltext indexing for a comprehensive search approach, combining semantic similarity, contextual relevance, and keyword-based search for optimal results.', + }, + { + mode: 'entity search+vector', + description: + 'Combines entity node vector indexing with graph connections for accurate entity-based search, providing the most relevant response.', + }, ]; + export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; export const timePerByte = 0.2; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 7a6283c27..3eb5f7ab7 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -392,7 +392,7 @@ export const isFileCompleted = (waitingFile: CustomFile, item: SourceNode) => waitingFile && item.status === 'Completed'; export const calculateProcessedCount = (prev: number, batchSize: number) => - (prev === batchSize ? batchSize - 1 : prev + 1); + prev === batchSize ? batchSize - 1 : prev + 1; export const isProcessingFileValid = (item: SourceNode, userCredentials: UserCredentials) => { return item.status === 'Processing' && item.fileName != undefined && userCredentials && userCredentials.database; @@ -409,6 +409,25 @@ export const capitalizeWithPlus = (s: string) => { .map((s) => capitalize(s)) .join('+'); }; + +export const getDescriptionForChatMode = (mode: string): string => { + switch (mode.toLowerCase()) { + case 'vector': + return 'Utilizes vector indexing on text chunks to enable semantic similarity search.'; + case 'graph': + return 'Leverages text-to-cypher translation to query a database and retrieve relevant data, ensuring a highly targeted and contextually accurate response.'; + case 'graph+vector': + return 'Combines vector indexing on text chunks with graph connections, enhancing search results with contextual relevance by considering relationships between concepts.'; + case 'fulltext': + return 'Employs a fulltext index on text chunks for rapid keyword-based search, efficiently identifying documents containing specific words or phrases.'; + case 'graph+vector+fulltext': + return 'Merges vector indexing, graph connections, and fulltext indexing for a comprehensive search approach, combining semantic similarity, contextual relevance, and keyword-based search for optimal results.'; + case 'entity search+vector': + return 'Combines entity node vector indexing with graph connections for accurate entity-based search, providing the most relevant response.'; + default: + return 'Chat mode description not available'; // Fallback description + } +}; export const getLogo = (mode: string): Record => { if (mode === 'light') { return { From 337cd257491d698e8f173eb172df5f9a09588576 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:50:48 +0000 Subject: [PATCH 090/292] Update log_struct method to add severity --- backend/score.py | 24 ++++++++++++------------ backend/src/logger.py | 6 +++--- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/backend/score.py b/backend/score.py index 8f62b29f1..bd1be1ea7 100644 --- a/backend/score.py +++ b/backend/score.py @@ -115,7 +115,7 @@ async def create_source_knowledge_graph_url( message = f"Source Node created successfully for source type: {source_type} and source: {source}" json_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(json_obj) + logger.log_struct(json_obj, "INFO") return create_api_response("Success",message=message,success_count=success_count,failed_count=failed_count,file_name=lst_file_name) except Exception as e: error_message = str(e) @@ -203,7 +203,7 @@ async def extract_knowledge_graph_from_file( result['wiki_query'] = wiki_query result['source_type'] = source_type result['logging_time'] = formatted_time(datetime.now(timezone.utc)) - logger.log_struct({"severity":"INFO","jsonPayload":result}) + logger.log_struct(result, "INFO") extract_api_time = time.time() - start_time logging.info(f"extraction completed in {extract_api_time:.2f} seconds for file name {file_name}") return create_api_response('Success', data=result, file_source= source_type) @@ -222,7 +222,7 @@ async def extract_knowledge_graph_from_file( logging.info(f'Deleted File Path: {merged_file_path} and Deleted File Name : {file_name}') delete_uploaded_local_file(merged_file_path,file_name) json_obj = {'message':message,'error_message':error_message, 'file_name': file_name,'status':'Failed','db_url':uri,'failed_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct({"severity":"ERROR","jsonPayload":json_obj}) + logger.log_struct(json_obj, "ERROR") logging.exception(f'File Failed in extraction: {json_obj}') return create_api_response('Failed', message=message + error_message[:100], error=error_message, file_name = file_name) finally: @@ -239,7 +239,7 @@ async def get_source_list(uri:str, userName:str, password:str, database:str=None uri = uri.replace(" ","+") result = await asyncio.to_thread(get_source_list_from_graph,uri,userName,decoded_password,database) json_obj = {'api_name':'sources_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) + logger.log_struct(json_obj, "INFO") return create_api_response("Success",data=result) except Exception as e: job_status = "Failed" @@ -269,7 +269,7 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database json_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logging.info(f'Entity Embeddings created') - logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) + logger.log_struct(json_obj, "INFO") return create_api_response('Success', message='All tasks completed successfully') except Exception as e: @@ -298,7 +298,7 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), result["info"]["response_time"] = round(total_call_time, 2) json_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) + logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result) except Exception as e: @@ -316,7 +316,7 @@ async def chunk_entities(uri=Form(),userName=Form(), password=Form(), chunk_ids= logging.info(f"URI: {uri}, Username: {userName}, chunk_ids: {chunk_ids}") result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, chunk_ids=chunk_ids) json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) + logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -344,7 +344,7 @@ async def graph_query( document_names=document_names ) json_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) + logger.log_struct(json_obj, "INFO") return create_api_response('Success', data=result) except Exception as e: job_status = "Failed" @@ -377,7 +377,7 @@ async def connect(uri=Form(), userName=Form(), password=Form(), database=Form()) graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph) json_obj = {'api_name':'connect','db_url':uri,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) + logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -394,7 +394,7 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(upload_file, graph, model, file, chunkNumber, totalChunks, originalname, uri, CHUNK_DIR, MERGED_DIR) json_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) + logger.log_struct(json_obj, "INFO") if int(chunkNumber) == int(totalChunks): return create_api_response('Success',data=result, message='Source Node Created Successfully') else: @@ -416,7 +416,7 @@ async def get_structured_schema(uri=Form(), userName=Form(), password=Form(), da result = await asyncio.to_thread(get_labels_and_relationtypes, graph) logging.info(f'Schema result from DB: {result}') json_obj = {'api_name':'schema','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) + logger.log_struct(json_obj, "INFO") return create_api_response('Success', data=result) except Exception as e: message="Unable to get the labels and relationtypes from neo4j database" @@ -485,7 +485,7 @@ async def delete_document_and_entities(uri=Form(), # entities_count = result[0]['deletedEntities'] if 'deletedEntities' in result[0] else 0 message = f"Deleted {files_list_size} documents with entities from database" json_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct({"severity":"INFO","jsonPayload":json_obj}) + logger.log_struct(json_obj, "INFO") return create_api_response('Success',message=message) except Exception as e: job_status = "Failed" diff --git a/backend/src/logger.py b/backend/src/logger.py index ae26a764b..6a9822787 100644 --- a/backend/src/logger.py +++ b/backend/src/logger.py @@ -11,8 +11,8 @@ def __init__(self): else: self.logger = None - def log_struct(self, message): + def log_struct(self, message, severity="DEFAULT"): if self.is_gcp_log_enabled and message is not None: - self.logger.log_struct(message) + self.logger.log_struct({"message": message, "severity": severity}) else: - print(message) + print(f"[{severity}]{message}") From dd89e0b25e29c39df968339373d7d883eb78a54d Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Mon, 16 Sep 2024 15:45:59 +0000 Subject: [PATCH 091/292] community check --- frontend/src/components/Graph/GraphViewModal.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index c29ae1c75..617d925a6 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -398,6 +398,8 @@ const GraphViewModal: React.FunctionComponent = ({ setRelationships(updatedRelations); setNodes(updatedNodes); }; + + // const isCommunity = allNodes.some(n=>n.labels.includes('__Community__')); return ( <> = ({ graphType={graphType} loading={loading} handleChange={handleCheckboxChange} - isgds={isGdsActive} + isgds={allNodes.some(n=>n.labels.includes('__Community__'))} /> )} From 6ed968d5936b6a81e831fc717dae92eeabb0bc09 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 17 Sep 2024 09:16:42 +0000 Subject: [PATCH 092/292] Entity Empty Label fix and Icon --- .../src/components/ChatBot/ChatInfoModal.tsx | 84 ++++++++++++------- .../src/components/Graph/GraphViewModal.tsx | 2 +- .../Deduplication/index.tsx | 4 +- frontend/src/utils/Utils.ts | 8 +- 4 files changed, 62 insertions(+), 36 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 2d4381f59..14b4f3bb8 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -57,7 +57,7 @@ const ChatInfoModal: React.FC = ({ }) => { const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); - const [activeTab, setActiveTab] = useState(error.length ? 10 : mode === 'graph' ? 4 : 3); + const [activeTab, setActiveTab] = useState(error?.length ? 10 : mode === 'graph' ? 4 : 3); const [infoEntities, setInfoEntities] = useState([]); const [communities, setCommunities] = useState([]); const [loading, setLoading] = useState(false); @@ -106,8 +106,30 @@ const ChatInfoModal: React.FC = ({ if (response.data.status === 'Failure') { throw new Error(response.data.error); } - setInfoEntities(response.data.data.nodes); - setNodes(response.data.data.nodes); + setInfoEntities( + response.data.data.nodes.map((n: Entity) => { + if (!n.labels.length && mode === 'entity search+vector') { + return { + ...n, + labels: ['Entity'], + }; + } + return n; + + }) + ); + setNodes( + response.data.data.nodes.map((n: ExtendedNode) => { + if (!n.labels.length && mode === 'entity search+vector') { + return { + ...n, + labels: ['Entity'], + }; + } + return n; + + }) + ); setRelationships(response.data.data.relationships); setCommunities(response.data.data.community_data); const chunks = response.data.data.chunk_data.map((chunk: any) => { @@ -132,15 +154,17 @@ const ChatInfoModal: React.FC = ({ }, [chunk_ids, mode, error]); const groupedEntities = useMemo<{ [key: string]: GroupedEntity }>(() => { - return infoEntities.reduce((acc, entity) => { + const items = infoEntities.reduce((acc, entity) => { const { label, text } = parseEntity(entity); if (!acc[label]) { + console.log({ label, text }); const newColor = calcWordColor(label); acc[label] = { texts: new Set(), color: newColor }; } acc[label].texts.add(text); return acc; }, {} as Record; color: string }>); + return items; }, [infoEntities]); const onChangeTabs = (tabId: number) => { @@ -149,7 +173,7 @@ const ChatInfoModal: React.FC = ({ const labelCounts = useMemo(() => { const counts: { [label: string]: number } = {}; - for (let index = 0; index < infoEntities.length; index++) { + for (let index = 0; index < infoEntities?.length; index++) { const entity = infoEntities[index]; const { labels } = entity; const [label] = labels; @@ -205,7 +229,7 @@ const ChatInfoModal: React.FC = ({ ) : ( <> )} - {mode === 'graph' && cypher_query?.trim().length ? ( + {mode === 'graph' && cypher_query?.trim()?.length ? ( Generated Cypher Query ) : ( <> @@ -219,10 +243,10 @@ const ChatInfoModal: React.FC = ({ - ) : mode === 'entity search+vector' && chunks.length ? ( + ) : mode === 'entity search+vector' && chunks?.length ? (
    {chunks - .map((c) => ({ fileName: c.fileName, fileSource: c.fileType })) + .map((c) => ({ fileName: c.fileName, fileSource: c.fileSource })) .map((s, index) => { return (
  • @@ -235,7 +259,6 @@ const ChatInfoModal: React.FC = ({ width={20} height={20} className='mr-2' - alt='S3 Logo' /> )} = ({ ); })}
- ) : sources.length ? ( + ) : sources?.length ? (
    {sources.map((link, index) => { return ( @@ -351,7 +374,7 @@ const ChatInfoModal: React.FC = ({ - ) : Object.keys(groupedEntities).length > 0 || Object.keys(graphonly_entities).length > 0 ? ( + ) : Object.keys(groupedEntities)?.length > 0 || Object.keys(graphonly_entities)?.length > 0 ? (
      {mode == 'graph' ? graphonly_entities.map((label, index) => ( @@ -367,26 +390,25 @@ const ChatInfoModal: React.FC = ({
)) - : sortedLabels.map((label, index) => ( -
  • -
    { + const entity = groupedEntities[label == 'undefined' ? 'Entity' : label]; + return ( +
  • - {label} ({labelCounts[label]}) - - - {Array.from(groupedEntities[label].texts).slice(0, 3).join(', ')} - -
  • - ))} +
    + {label} ({labelCounts[label]}) +
    + + {Array.from(entity.texts).slice(0, 3).join(', ')} + + + ); + })} ) : ( No Entities Found @@ -397,7 +419,7 @@ const ChatInfoModal: React.FC = ({ - ) : chunks.length > 0 ? ( + ) : chunks?.length > 0 ? (
      {chunks.map((chunk) => ( @@ -522,7 +544,7 @@ const ChatInfoModal: React.FC = ({ <> )} - {activeTab == 4 && nodes.length && relationships.length ? ( + {activeTab == 4 && nodes?.length && relationships?.length ? ( diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 617d925a6..50c381685 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -421,7 +421,7 @@ const GraphViewModal: React.FunctionComponent = ({ graphType={graphType} loading={loading} handleChange={handleCheckboxChange} - isgds={allNodes.some(n=>n.labels.includes('__Community__'))} + isgds={allNodes.some((n) => n.labels.includes('__Community__'))} /> )} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 36fcff3d1..f5a021e30 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -80,12 +80,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - d.e.elementId === nodeid + (d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d + : d) ); }); }; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 3eb5f7ab7..88aabd01c 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -359,8 +359,11 @@ export const capitalize = (word: string): string => { }; export const parseEntity = (entity: Entity) => { const { labels, properties } = entity; - const [label] = labels; + let [label] = labels; const text = properties.id; + if (!label) { + label = 'Entity'; + } return { label, text }; }; @@ -392,7 +395,7 @@ export const isFileCompleted = (waitingFile: CustomFile, item: SourceNode) => waitingFile && item.status === 'Completed'; export const calculateProcessedCount = (prev: number, batchSize: number) => - prev === batchSize ? batchSize - 1 : prev + 1; + (prev === batchSize ? batchSize - 1 : prev + 1); export const isProcessingFileValid = (item: SourceNode, userCredentials: UserCredentials) => { return item.status === 'Processing' && item.fileName != undefined && userCredentials && userCredentials.database; @@ -429,6 +432,7 @@ export const getDescriptionForChatMode = (mode: string): string => { } }; export const getLogo = (mode: string): Record => { + console.log(mode); if (mode === 'light') { return { Wikipedia: Wikipediadarkmode, From 9237847b7880a900fab123fdf76be2e6d6f1499c Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Tue, 17 Sep 2024 15:15:52 +0530 Subject: [PATCH 093/292] Update Utils.ts --- frontend/src/utils/Utils.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 88aabd01c..b59c082fd 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -432,7 +432,6 @@ export const getDescriptionForChatMode = (mode: string): string => { } }; export const getLogo = (mode: string): Record => { - console.log(mode); if (mode === 'light') { return { Wikipedia: Wikipediadarkmode, From 5f2290bf9b59d4f98c3555eea2b8bcbed6c955ec Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Tue, 17 Sep 2024 16:30:24 +0530 Subject: [PATCH 094/292] Retry processing - node and rels count update condition for start from beginning (#737) * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Reapply "Dockerfile changes with VITE label" This reverts commit a83e0855fbf54d2b5af009d96c4edf0bcd7ab84a. * Revert "Dockerfile changes with VITE label" This reverts commit 2840ebc9e6156c51465a9f54be72ca2d014147c2. * Concurrent processing of files (#665) * Update README.md * Droped the old vector index (#652) * added cypher_queries and llm chatbot files * updated llm-chatbot-python * added llm-chatbot-python * updated llm-chatbot-python folder * Added chatbot "hybrid " mode use case * added the concurrent file processing * page refresh scenario * fixed waiting files processing issue in refresh scenario * removed boolean param * fixed processedCount issue * checkbox with waiting check * fixed the refresh scenario with processing files * processing files check * server side error * processing file count check for processing files less than batch size * processing count check to handle allselected files * created helper functions * code improvements * __ changes (#656) * DiffbotGraphTransformer doesn't need an LLMGraphTransformer (#659) Co-authored-by: jeromechoo * Removed experiments/llm-chatbot-python folder from DEV branch * redcued the password clear timeout * Removed experiments/Cypher_Queries.ipynb file from DEV branch * disabled the closed button on banner and connection dialog while API is in pending state * update delete query with entities * node id check (#663) * Status source and type filtering (#664) * status source * Name change * type change * rollback to previous working nvl version * added the alert * add BATCH_SIZE to docker * temp fixes for 0.3.1 * alert fix for less than batch size processing * new virtual env * added Hybrid Chat modes (#670) * Rename the function #657 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * Graph node filename check * env fixes with latest nvl libraries * format fixes * removed local files * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Status source and type filtering (#664) * status source * Name change * type change * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * added cypher_queries and llm chatbot files * updated llm-chatbot-python * added llm-chatbot-python * updated llm-chatbot-python folder * page refresh scenario * fixed waiting files processing issue in refresh scenario * Removed experiments/llm-chatbot-python folder from DEV branch * disabled the closed button on banner and connection dialog while API is in pending state * node id check (#663) * Status source and type filtering (#664) * status source * Name change * type change * rollback to previous working nvl version * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Status source and type filtering (#664) * status source * Name change * type change * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * property spell fix --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Jayanth T Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env changes * format fixes * set retry status * retry processing backend * added the retry icon on rows * vite changes in docker compose * added retry dialog * Integrated the Retry processing API * Integrated the Extract API fro retry processing * Integrated ndl toast component * replaced foreach with normal for loop for better performance * types improvements * used toast component * spell fix * Issue fixed * processing changes in main * function closing fix * retry processing issue fixed * autoclosing the retry popup on retry api success * removed the retry if check * resetting the node and relationship count on retry * added the enter key events on the popups * fixed wikipedia icon on large file alert popup * setting nodes to 0 and start from last processed chunk logic changes * Retry Popup fixes * status changes for upload failed scenario * kept condition specific * changed status to reprocess from retry * Reprocess wording changes * tooltip changes * wordings and size changes * Changed status to Reprocess * updated node count for start from begnning --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Jayanth T Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Jerome Choo Co-authored-by: jeromechoo --- backend/src/main.py | 12 +++++++++--- backend/src/shared/constants.py | 7 ++++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/backend/src/main.py b/backend/src/main.py index cc5569c0d..eebdff70f 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -5,7 +5,8 @@ QUERY_TO_GET_LAST_PROCESSED_CHUNK_WITHOUT_ENTITY, START_FROM_BEGINNING, START_FROM_LAST_PROCESSED_POSITION, - DELETE_ENTITIES_AND_START_FROM_BEGINNING) + DELETE_ENTITIES_AND_START_FROM_BEGINNING, + QUERY_TO_GET_NODES_AND_RELATIONS_OF_A_DOCUMENT) from src.shared.schema_extraction import schema_extraction_from_text from langchain_community.document_loaders import GoogleApiClient, GoogleApiYoutubeLoader from dotenv import load_dotenv @@ -342,8 +343,13 @@ def processing_source(uri, userName, password, database, model, file_name, pages obj_source_node.updated_at = end_time obj_source_node.processing_time = processed_time obj_source_node.processed_chunk = select_chunks_upto+select_chunks_with_retry - obj_source_node.node_count = node_count - obj_source_node.relationship_count = rel_count + if retry_condition == START_FROM_BEGINNING: + result = graph.query(QUERY_TO_GET_NODES_AND_RELATIONS_OF_A_DOCUMENT, params={"filename":file_name}) + obj_source_node.node_count = result[0]['nodes'] + obj_source_node.relationship_count = result[0]['rels'] + else: + obj_source_node.node_count = node_count + obj_source_node.relationship_count = rel_count graphDb_data_Access.update_source_node(obj_source_node) result = graphDb_data_Access.get_current_status_document_node(file_name) diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 8bb1c56b1..c6d404a7f 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -312,7 +312,12 @@ RETURN c.id as id,c.position as position ORDER BY c.position LIMIT 1 """ - +QUERY_TO_GET_NODES_AND_RELATIONS_OF_A_DOCUMENT = """ + MATCH (d:Document)<-[:PART_OF]-(:Chunk)-[:HAS_ENTITY]->(e) where d.fileName=$filename + OPTIONAL MATCH (d)<-[:PART_OF]-(:Chunk)-[:HAS_ENTITY]->(e2:!Chunk)-[rel]-(e) + RETURN count(DISTINCT e) as nodes, count(DISTINCT rel) as rels + """ + START_FROM_BEGINNING = "start_from_beginning" DELETE_ENTITIES_AND_START_FROM_BEGINNING = "delete_entities_and_start_from_beginning" START_FROM_LAST_PROCESSED_POSITION = "start_from_last_processed_position" From fec0d1ec38319e18ba27f0440a4f57c8a4f80316 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 17 Sep 2024 11:06:06 +0000 Subject: [PATCH 095/292] uncommented the Retry Processing --- frontend/src/components/FileTable.tsx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index f6a0c0c08..aa4d9bb9a 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -35,7 +35,7 @@ import { } from '../utils/Utils'; import { SourceNode, CustomFile, FileTableProps, UserCredentials, statusupdate, ChildRef } from '../types'; import { useCredentials } from '../context/UserCredentials'; -import {MagnifyingGlassCircleIconSolid } from '@neo4j-ndl/react/icons'; +import { ArrowPathIconSolid, MagnifyingGlassCircleIconSolid } from '@neo4j-ndl/react/icons'; import CustomProgressBar from './UI/CustomProgressBar'; import subscribe from '../services/PollingAPI'; import { triggerStatusUpdateAPI } from '../services/ServerSideStatusUpdateAPI'; @@ -49,7 +49,7 @@ import IndeterminateCheckbox from './UI/CustomCheckBox'; import { showErrorToast, showNormalToast } from '../utils/toasts'; let onlyfortheFirstRender = true; const FileTable = forwardRef((props, ref) => { - const { isExpanded, connectionStatus, setConnectionStatus, onInspect } = props; + const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry } = props; const { filesData, setFilesData, model, rowSelection, setRowSelection, setSelectedRows, setProcessedCount, queue } = useFileContext(); const { userCredentials } = useCredentials(); @@ -146,7 +146,7 @@ const FileTable = forwardRef((props, ref) => { > {info.getValue()} - {/* {(info.getValue() === 'Completed' || + {(info.getValue() === 'Completed' || info.getValue() === 'Failed' || info.getValue() === 'Cancelled') && ( @@ -161,7 +161,7 @@ const FileTable = forwardRef((props, ref) => { - )} */} + )}
    ); } else if (info.getValue() === 'Processing' && info.row.original.processingProgress === undefined) { From dff7535b1001233e0a0b62a2162f36f45930657f Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Tue, 17 Sep 2024 11:30:01 +0000 Subject: [PATCH 096/292] removed __Entity__ labels --- backend/src/chunkid_entities.py | 8 ++++++++ backend/src/shared/constants.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index 30b1c15a9..53a0544f6 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -19,10 +19,18 @@ def process_records(records): relationship = element['relationship'] if start_node['element_id'] not in seen_nodes: + if "labels" in start_node.keys(): + labels = set(start_node["labels"]) + labels.discard("__Entity__") + start_node["labels"] = list(labels) nodes.append(start_node) seen_nodes.add(start_node['element_id']) if end_node['element_id'] not in seen_nodes: + if "labels" in end_node.keys(): + labels = set(end_node["labels"]) + labels.discard("__Entity__") + end_node["labels"] = list(labels) nodes.append(end_node) seen_nodes.add(end_node['element_id']) diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 4ec88e8a5..8a1d2bba2 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -74,7 +74,7 @@ MATCH (chunk)-[:PART_OF]->(d:Document) CALL {WITH chunk MATCH (chunk)-[:HAS_ENTITY]->(e) -MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk &! Document) +MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk &! Document &! `__Community__`) UNWIND rels as r RETURN collect(distinct r) as rels } From e3bcf2be1a29d36b00162019c028f875e660aca8 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 17 Sep 2024 12:09:48 +0000 Subject: [PATCH 097/292] spell fix --- frontend/src/services/CommonAPI.ts | 2 +- frontend/src/types.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/src/services/CommonAPI.ts b/frontend/src/services/CommonAPI.ts index b55d002fe..d1d5c767b 100644 --- a/frontend/src/services/CommonAPI.ts +++ b/frontend/src/services/CommonAPI.ts @@ -1,5 +1,5 @@ import { AxiosResponse, Method } from 'axios'; -import { UserCredentials, FormDataParams } from '../types'; +import { UserCredentials, FormDataParams } from '../types'; import api from '../API/Index'; // API Call diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 8b00b1695..679756dd9 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -84,7 +84,7 @@ export type UploadParams = { originalname: string; } & { [key: string]: any }; -export type ForDataParams = ExtractParams | UploadParams; +export type FormDataParams = ExtractParams | UploadParams; export interface DropdownProps { onSelect: (option: OptionType | null | void) => void; From a86dd12e903c855a502bfd266ac7657f854e6fbf Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 18 Sep 2024 06:52:12 +0000 Subject: [PATCH 098/292] fixed postprocessing method invoking issue for odd no files --- frontend/src/components/ChatBot/ChatInfoModal.tsx | 10 ++++------ frontend/src/components/Content.tsx | 7 +++++-- frontend/src/services/CommonAPI.ts | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 14b4f3bb8..758d793da 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -113,9 +113,8 @@ const ChatInfoModal: React.FC = ({ ...n, labels: ['Entity'], }; - } - return n; - + } + return n; }) ); setNodes( @@ -125,9 +124,8 @@ const ChatInfoModal: React.FC = ({ ...n, labels: ['Entity'], }; - } - return n; - + } + return n; }) ); setRelationships(response.data.data.relationships); diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 5862abe05..fb11e3fa0 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -145,7 +145,10 @@ const Content: React.FC = ({ if (processedCount == batchSize) { handleGenerateGraph([], true); } - }, [processedCount, userCredentials]); + if (processedCount === 1 && queue.isEmpty()) { + (async () => await postProcessing(userCredentials as UserCredentials, postProcessingTasks))(); + } + }, [processedCount, userCredentials, queue]); useEffect(() => { if (afterFirstRender) { @@ -363,7 +366,7 @@ const Content: React.FC = ({ const addFilesToQueue = async (remainingFiles: CustomFile[]) => { if (!remainingFiles.length) { - await postProcessing(userCredentials as UserCredentials, postProcessingTasks); + (async () => await postProcessing(userCredentials as UserCredentials, postProcessingTasks))(); } for (let index = 0; index < remainingFiles.length; index++) { const f = remainingFiles[index]; diff --git a/frontend/src/services/CommonAPI.ts b/frontend/src/services/CommonAPI.ts index d1d5c767b..b55d002fe 100644 --- a/frontend/src/services/CommonAPI.ts +++ b/frontend/src/services/CommonAPI.ts @@ -1,5 +1,5 @@ import { AxiosResponse, Method } from 'axios'; -import { UserCredentials, FormDataParams } from '../types'; +import { UserCredentials, FormDataParams } from '../types'; import api from '../API/Index'; // API Call From ba0957efc3265116c3dbad9683ffee2bd5d2b828 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 18 Sep 2024 06:57:26 +0000 Subject: [PATCH 099/292] lint fix --- frontend/src/components/Content.tsx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index fb11e3fa0..f76e45cb4 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -146,7 +146,9 @@ const Content: React.FC = ({ handleGenerateGraph([], true); } if (processedCount === 1 && queue.isEmpty()) { - (async () => await postProcessing(userCredentials as UserCredentials, postProcessingTasks))(); + (async () => { + await postProcessing(userCredentials as UserCredentials, postProcessingTasks); + })(); } }, [processedCount, userCredentials, queue]); @@ -366,7 +368,7 @@ const Content: React.FC = ({ const addFilesToQueue = async (remainingFiles: CustomFile[]) => { if (!remainingFiles.length) { - (async () => await postProcessing(userCredentials as UserCredentials, postProcessingTasks))(); + await postProcessing(userCredentials as UserCredentials, postProcessingTasks); } for (let index = 0; index < remainingFiles.length; index++) { const f = remainingFiles[index]; From 3fecf70e5f6139e0631299358fe3c025a23ba365 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 18 Sep 2024 07:25:36 +0000 Subject: [PATCH 100/292] Added filesource and name in chunks --- .../src/components/ChatBot/ChatInfoModal.tsx | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 758d793da..192b37407 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -489,14 +489,31 @@ const ChatInfoModal: React.FC = ({ Similarity Score: {chunk?.score} - ) : chunk.fileSource === 'local file' ? ( + ) : ( <> - Similarity Score: {chunk?.score} +
    + {chunk.fileSource === 'local file' ? ( + + ) : ( + + )} + + {chunk.fileName} + +
    - ) : ( - <> )} - {chunk?.text} + + Text: {chunk?.text} + ))} From 57550b787b2e8a1b56715169e8b6a46ac02d319e Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Wed, 18 Sep 2024 12:03:15 +0000 Subject: [PATCH 101/292] Preload=True remove from HSTS --- backend/score.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/score.py b/backend/score.py index bd1be1ea7..cecc42778 100644 --- a/backend/score.py +++ b/backend/score.py @@ -48,7 +48,7 @@ def sick(): app = FastAPI() SecWeb(app=app, Option={'referrer': False, 'xframe': False}) -app.add_middleware(HSTS, Option={'max-age': 4, 'preload': True}) +app.add_middleware(HSTS, Option={'max-age': 4}) app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) app.add_middleware(XContentTypeOptions) app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) From 1695f19c8802c7d26e0aacd3793595f8ada5c884 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Wed, 18 Sep 2024 18:41:07 +0530 Subject: [PATCH 102/292] Graph communities (#748) * UI changes * modes enable disable * separated sources entities chunk communities * communities added into separate component * Update ChatInfoModal.tsx * added filename and source for chunksinfo * removed the console.log * mode disable changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> --- backend/src/chunkid_entities.py | 3 + frontend/src/App.css | 2 +- .../src/components/ChatBot/ChatInfoModal.tsx | 413 ++---------------- .../src/components/ChatBot/ChatModeToggle.tsx | 64 +-- frontend/src/components/ChatBot/ChunkInfo.tsx | 127 ++++++ .../src/components/ChatBot/Communities.tsx | 36 ++ .../src/components/ChatBot/EntitiesInfo.tsx | 91 ++++ .../src/components/ChatBot/SourcesInfo.tsx | 139 ++++++ frontend/src/types.ts | 24 + frontend/src/utils/Utils.ts | 11 + 10 files changed, 508 insertions(+), 402 deletions(-) create mode 100644 frontend/src/components/ChatBot/ChunkInfo.tsx create mode 100644 frontend/src/components/ChatBot/Communities.tsx create mode 100644 frontend/src/components/ChatBot/EntitiesInfo.tsx create mode 100644 frontend/src/components/ChatBot/SourcesInfo.tsx diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index 53a0544f6..3fd3c3a71 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -135,6 +135,9 @@ def process_entityids(driver, chunk_ids): logging.info(f"Nodes and relationships are processed") result["chunk_data"] = records[0]["chunks"] result["community_data"] = records[0]["communities"] + else: + result["chunk_data"] = list() + result["community_data"] = list() logging.info(f"Query process completed successfully for chunk ids: {chunk_ids}") return result except Exception as e: diff --git a/frontend/src/App.css b/frontend/src/App.css index 615b8559b..e912a05e2 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -385,5 +385,5 @@ .custom-menu { min-width: 250px; - max-width: 300px; + max-width: 305px; } \ No newline at end of file diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 192b37407..82fa25e26 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -1,48 +1,36 @@ import { Box, Typography, - TextLink, Flex, Tabs, - LoadingSpinner, CypherCodeBlock, CypherCodeBlockProps, useCopyToClipboard, Banner, useMediaQuery, } from '@neo4j-ndl/react'; -import { - DocumentDuplicateIconOutline, - DocumentTextIconOutline, - ClipboardDocumentCheckIconOutline, - GlobeAltIconOutline, -} from '@neo4j-ndl/react/icons'; +import { DocumentDuplicateIconOutline, ClipboardDocumentCheckIconOutline } from '@neo4j-ndl/react/icons'; import '../../styling/info.css'; import Neo4jRetrievalLogo from '../../assets/images/Neo4jRetrievalLogo.png'; -import wikipedialogo from '../../assets/images/wikipedia.svg'; -import youtubelogo from '../../assets/images/youtube.svg'; -import gcslogo from '../../assets/images/gcs.webp'; -import s3logo from '../../assets/images/s3logo.png'; import { Chunk, Community, Entity, ExtendedNode, ExtendedRelationship, - GroupedEntity, UserCredentials, chatInfoMessage, } from '../../types'; import { useContext, useEffect, useMemo, useState } from 'react'; -import HoverableLink from '../UI/HoverableLink'; import GraphViewButton from '../Graph/GraphViewButton'; import { chunkEntitiesAPI } from '../../services/ChunkEntitiesInfo'; import { useCredentials } from '../../context/UserCredentials'; -import { calcWordColor } from '@neo4j-devtools/word-color'; -import ReactMarkdown from 'react-markdown'; -import { getLogo, parseEntity, youtubeLinkValidation } from '../../utils/Utils'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; import { tokens } from '@neo4j-ndl/base'; +import ChunkInfo from './ChunkInfo'; +import EntitiesInfo from './EntitiesInfo'; +import SourcesInfo from './SourcesInfo'; +import CommunitiesInfo from './Communities'; const ChatInfoModal: React.FC = ({ sources, @@ -106,8 +94,13 @@ const ChatInfoModal: React.FC = ({ if (response.data.status === 'Failure') { throw new Error(response.data.error); } + const nodesData = response?.data?.data?.nodes; + const relationshipsData = response?.data?.data?.relationships; + const communitiesData = response?.data?.data?.community_data; + const chunksData = response?.data?.data?.chunk_data; + setInfoEntities( - response.data.data.nodes.map((n: Entity) => { + nodesData.map((n: Entity) => { if (!n.labels.length && mode === 'entity search+vector') { return { ...n, @@ -118,30 +111,34 @@ const ChatInfoModal: React.FC = ({ }) ); setNodes( - response.data.data.nodes.map((n: ExtendedNode) => { + nodesData.map((n: ExtendedNode) => { if (!n.labels.length && mode === 'entity search+vector') { return { ...n, labels: ['Entity'], }; } - return n; + return n ?? []; }) ); - setRelationships(response.data.data.relationships); - setCommunities(response.data.data.community_data); - const chunks = response.data.data.chunk_data.map((chunk: any) => { - const chunkScore = chunk_ids.find((chunkdetail) => chunkdetail.id === chunk.id); - return { - ...chunk, - score: chunkScore?.score, - }; - }); - const sortedchunks = chunks.sort((a: any, b: any) => b.score - a.score); - setChunks(sortedchunks); + setRelationships(relationshipsData ?? []); + setCommunities(communitiesData ?? []); + setChunks( + chunksData + .map((chunk: any) => { + const chunkScore = chunk_ids.find((chunkdetail) => chunkdetail.id === chunk.id); + return ( + { + ...chunk, + score: chunkScore?.score, + } ?? [] + ); + }) + .sort((a: any, b: any) => b.score - a.score) + ); setLoading(false); } catch (error) { - console.error('Error fetching entities:', error); + console.error('Error fetching information:', error); setLoading(false); } })(); @@ -151,50 +148,10 @@ const ChatInfoModal: React.FC = ({ }; }, [chunk_ids, mode, error]); - const groupedEntities = useMemo<{ [key: string]: GroupedEntity }>(() => { - const items = infoEntities.reduce((acc, entity) => { - const { label, text } = parseEntity(entity); - if (!acc[label]) { - console.log({ label, text }); - const newColor = calcWordColor(label); - acc[label] = { texts: new Set(), color: newColor }; - } - acc[label].texts.add(text); - return acc; - }, {} as Record; color: string }>); - return items; - }, [infoEntities]); - const onChangeTabs = (tabId: number) => { setActiveTab(tabId); }; - const labelCounts = useMemo(() => { - const counts: { [label: string]: number } = {}; - for (let index = 0; index < infoEntities?.length; index++) { - const entity = infoEntities[index]; - const { labels } = entity; - const [label] = labels; - counts[label] = counts[label] ? counts[label] + 1 : 1; - } - return counts; - }, [infoEntities]); - - const sortedLabels = useMemo(() => { - return Object.keys(labelCounts).sort((a, b) => labelCounts[b] - labelCounts[a]); - }, [labelCounts]); - - const generateYouTubeLink = (url: string, startTime: string) => { - try { - const urlObj = new URL(url); - urlObj.searchParams.set('t', startTime); - return urlObj.toString(); - } catch (error) { - console.error('Invalid URL:', error); - return ''; - } - }; - return ( @@ -237,290 +194,18 @@ const ChatInfoModal: React.FC = ({ )} - {loading ? ( - - - - ) : mode === 'entity search+vector' && chunks?.length ? ( -
      - {chunks - .map((c) => ({ fileName: c.fileName, fileSource: c.fileSource })) - .map((s, index) => { - return ( -
    • -
      - {s.fileSource === 'local file' ? ( - - ) : ( - - )} - - {s.fileName} - -
      -
    • - ); - })} -
    - ) : sources?.length ? ( -
      - {sources.map((link, index) => { - return ( -
    • - {link?.startsWith('http') || link?.startsWith('https') ? ( - <> - {link?.includes('wikipedia.org') && ( -
      - Wikipedia Logo - - - - {link} - - - -
      - )} - {link?.includes('storage.googleapis.com') && ( -
      - Google Cloud Storage Logo - - {decodeURIComponent(link).split('/').at(-1)?.split('?')[0] ?? 'GCS File'} - -
      - )} - {youtubeLinkValidation(link) && ( - <> -
      - - - - - {link} - - - -
      - - )} - {!link?.startsWith('s3://') && - !link?.includes('storage.googleapis.com') && - !link?.includes('wikipedia.org') && - !link?.includes('youtube.com') && ( -
      - - - {link} - -
      - )} - - ) : link?.startsWith('s3://') ? ( -
      - S3 Logo - - {decodeURIComponent(link).split('/').at(-1) ?? 'S3 File'} - -
      - ) : ( -
      - - - {link} - -
      - )} -
    • - ); - })} -
    - ) : ( - No Sources Found - )} +
    - {loading ? ( - - - - ) : Object.keys(groupedEntities)?.length > 0 || Object.keys(graphonly_entities)?.length > 0 ? ( -
      - {mode == 'graph' - ? graphonly_entities.map((label, index) => ( -
    • -
      - { - // @ts-ignore - label[Object.keys(label)[0]].id ?? Object.keys(label)[0] - } -
      -
    • - )) - : sortedLabels.map((label, index) => { - const entity = groupedEntities[label == 'undefined' ? 'Entity' : label]; - return ( -
    • -
      - {label} ({labelCounts[label]}) -
      - - {Array.from(entity.texts).slice(0, 3).join(', ')} - -
    • - ); - })} -
    - ) : ( - No Entities Found - )} +
    - {loading ? ( - - - - ) : chunks?.length > 0 ? ( -
    -
      - {chunks.map((chunk) => ( -
    • - {chunk?.page_number ? ( - <> -
      - - - {/* {chunk?.fileName}, Page: {chunk?.page_number} */} - {chunk?.fileName} - -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.start_time ? ( - <> -
      - - - - {chunk?.fileName} - - -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.url.includes('wikipedia.org') ? ( - <> -
      - - {chunk?.fileName} -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.url.includes('storage.googleapis.com') ? ( - <> -
      - - {chunk?.fileName} -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.url.startsWith('s3://') ? ( - <> -
      - - {chunk?.fileName} -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && - !chunk?.url.startsWith('s3://') && - !chunk?.url.includes('storage.googleapis.com') && - !chunk?.url.includes('wikipedia.org') && - !chunk?.url.includes('youtube.com') ? ( - <> -
      - - - {chunk?.url} - -
      - Similarity Score: {chunk?.score} - - ) : ( - <> -
      - {chunk.fileSource === 'local file' ? ( - - ) : ( - - )} - - {chunk.fileName} - -
      - - )} - - Text: {chunk?.text} - -
    • - ))} -
    -
    - ) : ( - No Chunks Found - )} +
    = ({ /> {mode === 'entity search+vector' ? ( - - {loading ? ( - - - - ) : ( -
    -
      - {communities.map((community, index) => ( -
    • -
      - - ID : - {community.id} - - {community.summary} -
      -
    • - ))} -
    -
    - )} + + ) : ( <> diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index a0ebe879a..293706910 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -8,7 +8,7 @@ import { capitalizeWithPlus } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; export default function ChatModeToggle({ menuAnchor, - closeHandler = () => {}, + closeHandler = () => { }, open, anchorPortal = true, disableBackdrop = false, @@ -19,7 +19,7 @@ export default function ChatModeToggle({ anchorPortal?: boolean; disableBackdrop?: boolean; }) { - const { setchatMode, chatMode, postProcessingTasks } = useFileContext(); + const { setchatMode, chatMode, postProcessingTasks, selectedRows } = useFileContext(); const isCommunityAllowed = postProcessingTasks.includes('create_communities'); const { isGdsActive } = useCredentials(); const memoizedChatModes = useMemo(() => { @@ -27,34 +27,44 @@ export default function ChatModeToggle({ ? chatModes : chatModes?.filter((m) => !m.mode.includes('entity search+vector')); }, [isGdsActive, isCommunityAllowed]); + + const menuItems = useMemo(() => { - return memoizedChatModes?.map((m) => ({ - title: ( -
    - - {m.mode.includes('+') ? capitalizeWithPlus(m.mode) : capitalize(m.mode)} - + return memoizedChatModes?.map((m) => { + const isDisabled = Boolean(selectedRows.length && !(m.mode === 'vector' || m.mode === 'graph+vector')); + return { + title: (
    - {m.description} + + {m.mode.includes('+') ? capitalizeWithPlus(m.mode) : capitalize(m.mode)} + +
    + {m.description} +
    -
    - ), - onClick: () => { - setchatMode(m.mode); - closeHandler(); // Close the menu after setting the chat mode - }, - disabledCondition: false, - description: ( - - {chatMode === m.mode && ( - <> - Selected - - )} - - ), - })); - }, [chatMode, memoizedChatModes, setchatMode, closeHandler]); + ), + onClick: () => { + setchatMode(m.mode); + closeHandler(); + }, + disabledCondition: isDisabled, + description: ( + + {chatMode === m.mode && ( + <> + Selected + + )} + {isDisabled && ( + <> + Chatmode not available + + )} + + ), + }; + }); + }, [chatMode, memoizedChatModes, setchatMode, closeHandler, selectedRows]); return ( = ({ loading, chunks }) => { + const themeUtils = useContext(ThemeWrapperContext); + + return ( + <> + {loading ? ( + + + + ) : chunks?.length > 0 ? ( +
    +
      + {chunks.map((chunk) => ( +
    • + {chunk?.page_number ? ( + <> +
      + + + {chunk?.fileName} + +
      + Similarity Score: {chunk?.score} + + ) : chunk?.url && chunk?.start_time ? ( + <> +
      + + + + {chunk?.fileName} + + +
      + Similarity Score: {chunk?.score} + + ) : chunk?.url && chunk?.url.includes('wikipedia.org') ? ( + <> +
      + + {chunk?.fileName} +
      + Similarity Score: {chunk?.score} + + ) : chunk?.url && chunk?.url.includes('storage.googleapis.com') ? ( + <> +
      + + {chunk?.fileName} +
      + Similarity Score: {chunk?.score} + + ) : chunk?.url && chunk?.url.startsWith('s3://') ? ( + <> +
      + + {chunk?.fileName} +
      + Similarity Score: {chunk?.score} + + ) : chunk?.url && + !chunk?.url.startsWith('s3://') && + !chunk?.url.includes('storage.googleapis.com') && + !chunk?.url.includes('wikipedia.org') && + !chunk?.url.includes('youtube.com') ? ( + <> +
      + + + {chunk?.url} + +
      + Similarity Score: {chunk?.score} + + ) : ( + <> +
      + {chunk.fileSource === 'local file' ? ( + + ) : ( + + )} + + {chunk.fileName} + +
      + + )} + {chunk?.text} +
    • + ))} +
    +
    + ) : ( + No Chunks Found + )} + + ); +}; + +export default ChunkInfo; diff --git a/frontend/src/components/ChatBot/Communities.tsx b/frontend/src/components/ChatBot/Communities.tsx new file mode 100644 index 000000000..9b530fd0f --- /dev/null +++ b/frontend/src/components/ChatBot/Communities.tsx @@ -0,0 +1,36 @@ +import { Box, LoadingSpinner, Flex, Typography } from '@neo4j-ndl/react'; +import { FC } from 'react'; +import ReactMarkdown from 'react-markdown'; +import { CommunitiesProps } from '../../types'; + +const CommunitiesInfo: FC = ({ loading, communities }) => { + return ( + <> + {loading ? ( + + + + ) : communities?.length > 0 ? ( +
    +
      + {communities.map((community, index) => ( +
    • +
      + + ID : + {community.id} + + {community.summary} +
      +
    • + ))} +
    +
    + ) : ( + No Communities Found + )} + + ); +}; + +export default CommunitiesInfo; diff --git a/frontend/src/components/ChatBot/EntitiesInfo.tsx b/frontend/src/components/ChatBot/EntitiesInfo.tsx new file mode 100644 index 000000000..6c6e0784e --- /dev/null +++ b/frontend/src/components/ChatBot/EntitiesInfo.tsx @@ -0,0 +1,91 @@ +import { Box, GraphLabel, LoadingSpinner, Typography } from '@neo4j-ndl/react'; +import { FC, useMemo } from 'react'; +import { EntitiesProps, GroupedEntity } from '../../types'; +import { calcWordColor } from '@neo4j-devtools/word-color'; +import { graphLabels } from '../../utils/Constants'; +import { parseEntity } from '../../utils/Utils'; + +const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, infoEntities }) => { + const groupedEntities = useMemo<{ [key: string]: GroupedEntity }>(() => { + const items = infoEntities.reduce((acc, entity) => { + const { label, text } = parseEntity(entity); + if (!acc[label]) { + const newColor = calcWordColor(label); + acc[label] = { texts: new Set(), color: newColor }; + } + acc[label].texts.add(text); + return acc; + }, {} as Record; color: string }>); + return items; + }, [infoEntities]); + + const labelCounts = useMemo(() => { + const counts: { [label: string]: number } = {}; + for (let index = 0; index < infoEntities?.length; index++) { + const entity = infoEntities[index]; + const { labels } = entity; + const [label] = labels; + counts[label] = counts[label] ? counts[label] + 1 : 1; + } + return counts; + }, [infoEntities]); + + const sortedLabels = useMemo(() => { + return Object.keys(labelCounts).sort((a, b) => labelCounts[b] - labelCounts[a]); + }, [labelCounts]); + return ( + <> + {loading ? ( + + + + ) : Object.keys(groupedEntities)?.length > 0 || Object.keys(graphonly_entities)?.length > 0 ? ( +
      + {mode == 'graph' + ? graphonly_entities.map((label, index) => ( +
    • +
      + { + // @ts-ignore + label[Object.keys(label)[0]].id ?? Object.keys(label)[0] + } +
      +
    • + )) + : sortedLabels.map((label, index) => { + const entity = groupedEntities[label == 'undefined' ? 'Entity' : label]; + return ( +
    • + e.preventDefault()} + > + {label === '__Community__' ? graphLabels.community : label} ({labelCounts[label]}) + + + {Array.from(entity.texts).slice(0, 3).join(', ')} + +
    • + ); + })} +
    + ) : ( + No Entities Found + )} + + ); +}; + +export default EntitiesInfo; diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx new file mode 100644 index 000000000..fd5d89949 --- /dev/null +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -0,0 +1,139 @@ +import { FC, useContext } from 'react'; +import { SourcesProps } from '../../types'; +import { Box, LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; +import { DocumentTextIconOutline, GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; +import { getLogo, youtubeLinkValidation } from '../../utils/Utils'; +import { ThemeWrapperContext } from '../../context/ThemeWrapper'; +import HoverableLink from '../UI/HoverableLink'; +import wikipedialogo from '../../assets/images/wikipedia.svg'; +import youtubelogo from '../../assets/images/youtube.svg'; +import gcslogo from '../../assets/images/gcs.webp'; +import s3logo from '../../assets/images/s3logo.png'; + +const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { + const themeUtils = useContext(ThemeWrapperContext); + return ( + <> + {loading ? ( + + + + ) : mode === 'entity search+vector' && chunks?.length ? ( +
      + {chunks + .map((c) => ({ fileName: c.fileName, fileSource: c.fileSource })) + .map((s, index) => { + return ( +
    • +
      + {s.fileSource === 'local file' ? ( + + ) : ( + + )} + + {s.fileName} + +
      +
    • + ); + })} +
    + ) : sources?.length ? ( +
      + {sources.map((link, index) => { + return ( +
    • + {link?.startsWith('http') || link?.startsWith('https') ? ( + <> + {link?.includes('wikipedia.org') && ( +
      + Wikipedia Logo + + + + {link} + + + +
      + )} + {link?.includes('storage.googleapis.com') && ( +
      + Google Cloud Storage Logo + + {decodeURIComponent(link).split('/').at(-1)?.split('?')[0] ?? 'GCS File'} + +
      + )} + {youtubeLinkValidation(link) && ( + <> +
      + + + + + {link} + + + +
      + + )} + {!link?.startsWith('s3://') && + !link?.includes('storage.googleapis.com') && + !link?.includes('wikipedia.org') && + !link?.includes('youtube.com') && ( +
      + + + {link} + +
      + )} + + ) : link?.startsWith('s3://') ? ( +
      + S3 Logo + + {decodeURIComponent(link).split('/').at(-1) ?? 'S3 File'} + +
      + ) : ( +
      + + + {link} + +
      + )} +
    • + ); + })} +
    + ) : ( + No Sources Found + )} + + ); +}; + +export default SourcesInfo; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 679756dd9..9631335a9 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -644,3 +644,27 @@ export interface DatabaseStatusProps { isGdsActive: boolean; uri: string | null; } + +export type SourcesProps = { + loading: boolean; + mode: string; + sources: string[]; + chunks: Chunk[]; +}; + +export type ChunkProps = { + loading: boolean; + chunks: Chunk[]; +}; + +export type EntitiesProps = { + loading: boolean; + mode: string; + graphonly_entities: []; + infoEntities: Entity[]; +}; + +export type CommunitiesProps = { + loading: boolean; + communities: Community[]; +}; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index b59c082fd..82a7cda3e 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -449,3 +449,14 @@ export const getLogo = (mode: string): Record => { 'gcs bucket': gcslogo, }; }; + +export const generateYouTubeLink = (url: string, startTime: string) => { + try { + const urlObj = new URL(url); + urlObj.searchParams.set('t', startTime); + return urlObj.toString(); + } catch (error) { + console.error('Invalid URL:', error); + return ''; + } +}; From f0810c7da834466d5878e55f4784d2f5a7cb51b8 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Wed, 18 Sep 2024 13:17:43 +0000 Subject: [PATCH 103/292] aria label addition --- .../GraphEnhancementDialog/PostProcessingCheckList/index.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx index 0051fefbf..28114ced7 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx @@ -45,6 +45,7 @@ export default function PostProcessingCheckList() { } }} disabled={isCreateCommunities && !isGdsActive} + aria-label='checkbox-postProcessing' /> {job.description}
    From 869c87e4185d904eb81dccf5794bf7402894bb3a Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 19 Sep 2024 05:10:32 +0000 Subject: [PATCH 104/292] code improvements used URL class for host url check --- frontend/src/components/ChatBot/ChatModeToggle.tsx | 3 +-- frontend/src/components/ChatBot/ChunkInfo.tsx | 10 ++++------ frontend/src/utils/Utils.ts | 8 ++++++++ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 293706910..d8f1f99c9 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -8,7 +8,7 @@ import { capitalizeWithPlus } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; export default function ChatModeToggle({ menuAnchor, - closeHandler = () => { }, + closeHandler = () => {}, open, anchorPortal = true, disableBackdrop = false, @@ -28,7 +28,6 @@ export default function ChatModeToggle({ : chatModes?.filter((m) => !m.mode.includes('entity search+vector')); }, [isGdsActive, isCommunityAllowed]); - const menuItems = useMemo(() => { return memoizedChatModes?.map((m) => { const isDisabled = Boolean(selectedRows.length && !(m.mode === 'vector' || m.mode === 'graph+vector')); diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index eb7b435bb..350b41b98 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -7,7 +7,7 @@ import youtubelogo from '../../assets/images/youtube.svg'; import gcslogo from '../../assets/images/gcs.webp'; import s3logo from '../../assets/images/s3logo.png'; import ReactMarkdown from 'react-markdown'; -import { generateYouTubeLink, getLogo } from '../../utils/Utils'; +import { generateYouTubeLink, getLogo, isAllowedHost } from '../../utils/Utils'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; const ChunkInfo: FC = ({ loading, chunks }) => { @@ -52,7 +52,7 @@ const ChunkInfo: FC = ({ loading, chunks }) => { Similarity Score: {chunk?.score} - ) : chunk?.url && chunk?.url.includes('wikipedia.org') ? ( + ) : chunk?.url && new URL(chunk.url).host === 'wikipedia.org' ? ( <>
    @@ -60,7 +60,7 @@ const ChunkInfo: FC = ({ loading, chunks }) => {
    Similarity Score: {chunk?.score} - ) : chunk?.url && chunk?.url.includes('storage.googleapis.com') ? ( + ) : chunk?.url && new URL(chunk.url).host === 'storage.googleapis.com' ? ( <>
    @@ -78,9 +78,7 @@ const ChunkInfo: FC = ({ loading, chunks }) => { ) : chunk?.url && !chunk?.url.startsWith('s3://') && - !chunk?.url.includes('storage.googleapis.com') && - !chunk?.url.includes('wikipedia.org') && - !chunk?.url.includes('youtube.com') ? ( + !isAllowedHost(chunk?.url, ['storage.googleapis.com', 'wikipedia.org', 'youtube.com']) ? ( <>
    diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 82a7cda3e..50ce37c3e 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -460,3 +460,11 @@ export const generateYouTubeLink = (url: string, startTime: string) => { return ''; } }; +export function isAllowedHost(url: string, allowedHosts: string[]) { + try { + const parsedUrl = new URL(url); + return allowedHosts.includes(parsedUrl.host); + } catch (e) { + return false; + } +} From b2097e5a1d5acd5cc10ff4104364a1fae24904b9 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 19 Sep 2024 05:17:39 +0000 Subject: [PATCH 105/292] host level check --- frontend/src/components/ChatBot/SourcesInfo.tsx | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx index fd5d89949..91fee511f 100644 --- a/frontend/src/components/ChatBot/SourcesInfo.tsx +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -2,7 +2,7 @@ import { FC, useContext } from 'react'; import { SourcesProps } from '../../types'; import { Box, LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; import { DocumentTextIconOutline, GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; -import { getLogo, youtubeLinkValidation } from '../../utils/Utils'; +import { getLogo, isAllowedHost, youtubeLinkValidation } from '../../utils/Utils'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; import HoverableLink from '../UI/HoverableLink'; import wikipedialogo from '../../assets/images/wikipedia.svg'; @@ -49,7 +49,7 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => {
  • {link?.startsWith('http') || link?.startsWith('https') ? ( <> - {link?.includes('wikipedia.org') && ( + {isAllowedHost(link, ['wikipedia.org']) && (
    Wikipedia Logo @@ -64,7 +64,7 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => {
    )} - {link?.includes('storage.googleapis.com') && ( + {isAllowedHost(link, ['storage.googleapis.com']) && (
    Google Cloud Storage Logo = ({ loading, mode, chunks, sources }) => { )} {!link?.startsWith('s3://') && - !link?.includes('storage.googleapis.com') && - !link?.includes('wikipedia.org') && - !link?.includes('youtube.com') && ( + !isAllowedHost(link, ['storage.googleapis.com', 'wikipedia.org', 'youtube.com']) && (
    From 3819118bfd40e815944db85beca9f54a9f68415f Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Thu, 19 Sep 2024 06:18:43 +0000 Subject: [PATCH 106/292] Update Security header --- backend/score.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/score.py b/backend/score.py index cecc42778..74d22759f 100644 --- a/backend/score.py +++ b/backend/score.py @@ -49,7 +49,7 @@ def sick(): app = FastAPI() SecWeb(app=app, Option={'referrer': False, 'xframe': False}) app.add_middleware(HSTS, Option={'max-age': 4}) -app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) +app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': [], 'clearSiteData': {'cache': False, 'storage': False}, 'cacheControl': {'public': False, 's-maxage': 0}}, script_nonce=False, style_nonce=False, report_only=False) app.add_middleware(XContentTypeOptions) app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) From 1246b41c80276dbf20035f5abae99a035dbb718c Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 19 Sep 2024 07:06:00 +0000 Subject: [PATCH 107/292] encryption of localstorage values --- frontend/src/components/Content.tsx | 10 ++++++++-- .../Popups/ConnectionModal/ConnectionModal.tsx | 6 ++++-- frontend/src/services/CancelAPI.ts | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index f76e45cb4..517ada241 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -111,7 +111,7 @@ const Content: React.FC = ({ setUserCredentials({ uri: neo4jConnection.uri, userName: neo4jConnection.user, - password: neo4jConnection.password, + password: atob(neo4jConnection.password), database: neo4jConnection.database, port: neo4jConnection.uri.split(':')[2], }); @@ -172,13 +172,19 @@ const Content: React.FC = ({ (async () => { const parsedData = JSON.parse(connection); console.log(parsedData.uri); - const response = await connectAPI(parsedData.uri, parsedData.user, parsedData.password, parsedData.database); + const response = await connectAPI( + parsedData.uri, + parsedData.user, + atob(parsedData.password), + parsedData.database + ); if (response?.data?.status === 'Success') { localStorage.setItem( 'neo4j.connection', JSON.stringify({ ...parsedData, userDbVectorIndex: response.data.data.db_vector_dimension, + password: btoa(atob(parsedData.password)), }) ); if ( diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 6b0a986dc..0213e9502 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -84,7 +84,7 @@ export default function ConnectionModal({ JSON.stringify({ uri: usercredential?.uri, user: usercredential?.userName, - password: usercredential?.password, + password: btoa(usercredential?.password), database: usercredential?.database, userDbVectorIndex: 384, }) @@ -189,6 +189,7 @@ export default function ConnectionModal({ setMessage({ type: 'danger', content: 'Please drop a valid file' }); } } catch (err: any) { + console.log({ err }); setMessage({ type: 'danger', content: err.message }); } } @@ -213,7 +214,7 @@ export default function ConnectionModal({ JSON.stringify({ uri: connectionURI, user: username, - password: password, + password: btoa(password), database: database, userDbVectorIndex, isgdsActive, @@ -262,6 +263,7 @@ export default function ConnectionModal({ } } } catch (error) { + console.log({ error }); setIsLoading(false); if (error instanceof Error) { setMessage({ type: 'danger', content: error.message }); diff --git a/frontend/src/services/CancelAPI.ts b/frontend/src/services/CancelAPI.ts index de3ea9ba0..f491f06e0 100644 --- a/frontend/src/services/CancelAPI.ts +++ b/frontend/src/services/CancelAPI.ts @@ -9,7 +9,7 @@ const cancelAPI = async (filenames: string[], source_types: string[]) => { formData.append('uri', credentials?.uri ?? ''); formData.append('database', credentials?.database ?? ''); formData.append('userName', credentials?.user ?? ''); - formData.append('password', credentials?.password ?? ''); + formData.append('password', atob(credentials?.password) ?? ''); } formData.append('filenames', JSON.stringify(filenames)); formData.append('source_types', JSON.stringify(source_types)); From 1709a162eb2af7d8fe02f128011c4182d50fb932 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey Date: Thu, 19 Sep 2024 17:11:01 +0530 Subject: [PATCH 108/292] 'mode-selection-changes' --- backend/score.py | 5 --- .../src/components/ChatBot/ChatModeToggle.tsx | 34 +++++++++++++++---- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/backend/score.py b/backend/score.py index e47905363..e87fc618e 100644 --- a/backend/score.py +++ b/backend/score.py @@ -48,11 +48,6 @@ def sick(): return False app = FastAPI() -SecWeb(app=app, Option={'referrer': False, 'xframe': False}) -app.add_middleware(HSTS, Option={'max-age': 4, 'preload': True}) -app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) -app.add_middleware(XContentTypeOptions) -app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) app.add_middleware( CORSMiddleware, diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index d8f1f99c9..944157f1e 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -1,5 +1,5 @@ import { StatusIndicator, Typography } from '@neo4j-ndl/react'; -import { useMemo } from 'react'; +import { useMemo, useEffect } from 'react'; import { useFileContext } from '../../context/UsersFiles'; import CustomMenu from '../UI/Menu'; import { chatModes } from '../../utils/Constants'; @@ -8,7 +8,7 @@ import { capitalizeWithPlus } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; export default function ChatModeToggle({ menuAnchor, - closeHandler = () => {}, + closeHandler = () => { }, open, anchorPortal = true, disableBackdrop = false, @@ -22,15 +22,31 @@ export default function ChatModeToggle({ const { setchatMode, chatMode, postProcessingTasks, selectedRows } = useFileContext(); const isCommunityAllowed = postProcessingTasks.includes('create_communities'); const { isGdsActive } = useCredentials(); + + useEffect(() => { + if (selectedRows.length !== 0) { + setchatMode('graph+vector'); + } else { + setchatMode('graph+vector+fulltext'); + } + }, [selectedRows]); + const memoizedChatModes = useMemo(() => { return isGdsActive && isCommunityAllowed ? chatModes : chatModes?.filter((m) => !m.mode.includes('entity search+vector')); }, [isGdsActive, isCommunityAllowed]); - const menuItems = useMemo(() => { return memoizedChatModes?.map((m) => { const isDisabled = Boolean(selectedRows.length && !(m.mode === 'vector' || m.mode === 'graph+vector')); + const handleModeChange = () => { + if (isDisabled) { + setchatMode('graph+vector'); + } else { + setchatMode(m.mode); + } + closeHandler(); + }; return { title: (
    @@ -42,10 +58,7 @@ export default function ChatModeToggle({
    ), - onClick: () => { - setchatMode(m.mode); - closeHandler(); - }, + onClick: handleModeChange, disabledCondition: isDisabled, description: ( @@ -64,6 +77,13 @@ export default function ChatModeToggle({ }; }); }, [chatMode, memoizedChatModes, setchatMode, closeHandler, selectedRows]); + + useEffect(() => { + if (!selectedRows.length && !chatMode) { + setchatMode(chatMode); + } + }, [setchatMode, selectedRows, chatMode]); + return ( Date: Thu, 19 Sep 2024 12:20:47 +0000 Subject: [PATCH 109/292] added local chat history --- backend/src/QA_integration.py | 53 +++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index d28459a9b..eb6ac5be1 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -19,6 +19,7 @@ from langchain_text_splitters import TokenTextSplitter from langchain_core.messages import HumanMessage, AIMessage from langchain.chains import GraphCypherQAChain +from langchain_community.chat_message_histories import ChatMessageHistory # LangChain chat models from langchain_openai import ChatOpenAI, AzureChatOpenAI @@ -37,7 +38,29 @@ load_dotenv() EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') -EMBEDDING_FUNCTION , _ = load_embedding_model(EMBEDDING_MODEL) +EMBEDDING_FUNCTION , _ = load_embedding_model(EMBEDDING_MODEL) + + + +class SessionChatHistory: + history_dict = {} + + @classmethod + def get_chat_history(cls, session_id): + """Retrieve or create chat message history for a given session ID.""" + if session_id not in cls.history_dict: + logging.info(f"Creating new ChatMessageHistory Local for session ID: {session_id}") + cls.history_dict[session_id] = ChatMessageHistory() + else: + logging.info(f"Retrieved existing ChatMessageHistory Local for session ID: {session_id}") + return cls.history_dict[session_id] + +def get_history_by_session_id(session_id): + try: + return SessionChatHistory.get_chat_history(session_id) + except Exception as e: + logging.error(f"Failed to get history for session ID '{session_id}': {e}") + raise def get_total_tokens(ai_response, llm): try: @@ -68,12 +91,15 @@ def get_total_tokens(ai_response, llm): return total_tokens -def clear_chat_history(graph, session_id): +def clear_chat_history(graph, session_id,local=False): try: - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id - ) + if not local: + history = Neo4jChatMessageHistory( + graph=graph, + session_id=session_id + ) + else: + history = get_history_by_session_id(session_id) history.clear() @@ -534,17 +560,20 @@ def process_graph_response(model, graph, question, messages, history): "user": "chatbot" } -def create_neo4j_chat_message_history(graph, session_id): +def create_neo4j_chat_message_history(graph, session_id, local=False): """ Creates and returns a Neo4jChatMessageHistory instance. """ try: - - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id - ) + if not local: + history = Neo4jChatMessageHistory( + graph=graph, + session_id=session_id + ) + return history + + history = get_history_by_session_id(session_id) return history except Exception as e: From 9210bf8a226ced845542ce6cb8efda052a12c682 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 19 Sep 2024 13:09:35 +0000 Subject: [PATCH 110/292] added neo4j from existing index to entity vector mode --- backend/src/QA_integration.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index d28459a9b..7ac99f66b 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -287,13 +287,14 @@ def initialize_neo4j_vector(graph, chat_mode_settings): keyword_index_name=keyword_index ) logging.info(f"Successfully retrieved Neo4jVector Fulltext index '{index_name}' and keyword index '{keyword_index}'") + elif mode == "entity search+vector": + neo_db = Neo4jVector.from_existing_index( + embedding=EMBEDDING_FUNCTION, + index_name=index_name, + retrieval_query=retrieval_query, + graph=graph + ) else: - # neo_db = Neo4jVector.from_existing_index( - # embedding=EMBEDDING_FUNCTION, - # index_name=index_name, - # retrieval_query=retrieval_query, - # graph=graph - # ) neo_db = Neo4jVector.from_existing_graph( embedding=EMBEDDING_FUNCTION, index_name=index_name, @@ -303,7 +304,6 @@ def initialize_neo4j_vector(graph, chat_mode_settings): embedding_node_property="embedding", text_node_properties=["text"] ) - logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'") except Exception as e: index_name = chat_mode_settings.get("index_name") From b5b8545edf2d635c2ff1ca1f60862954be2b5bf8 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Thu, 19 Sep 2024 13:21:07 +0000 Subject: [PATCH 111/292] label changes --- backend/src/chunkid_entities.py | 6 ++++ backend/src/graph_query.py | 2 ++ .../src/components/ChatBot/ChatInfoModal.tsx | 29 ++++++++-------- .../src/components/ChatBot/ChatModeToggle.tsx | 16 ++++----- frontend/src/components/ChatBot/Chatbot.tsx | 4 +-- frontend/src/context/UsersFiles.tsx | 4 +-- frontend/src/utils/Constants.ts | 34 ++++++++++++------- frontend/src/utils/Utils.ts | 13 +++---- 8 files changed, 64 insertions(+), 44 deletions(-) diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index 3fd3c3a71..aa8d1d4ca 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -22,6 +22,8 @@ def process_records(records): if "labels" in start_node.keys(): labels = set(start_node["labels"]) labels.discard("__Entity__") + if not labels: + labels.add('*') start_node["labels"] = list(labels) nodes.append(start_node) seen_nodes.add(start_node['element_id']) @@ -30,6 +32,8 @@ def process_records(records): if "labels" in end_node.keys(): labels = set(end_node["labels"]) labels.discard("__Entity__") + if not labels: + labels.add('*') end_node["labels"] = list(labels) nodes.append(end_node) seen_nodes.add(end_node['element_id']) @@ -106,6 +110,8 @@ def remove_duplicate_nodes(nodes,property="element_id"): if "labels" in node.keys(): labels = set(node["labels"]) labels.discard("__Entity__") + if not labels: + labels.add('*') node["labels"] = list(labels) unique_nodes.append(node) seen_element_ids.add(element_id) diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py index 77f6e9d0a..fb7333b48 100644 --- a/backend/src/graph_query.py +++ b/backend/src/graph_query.py @@ -63,6 +63,8 @@ def process_node(node): try: labels = set(node.labels) labels.discard("__Entity__") + if not labels: + labels.add('*') node_element = { "element_id": node.element_id, diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 82fa25e26..53172256a 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -31,6 +31,7 @@ import ChunkInfo from './ChunkInfo'; import EntitiesInfo from './EntitiesInfo'; import SourcesInfo from './SourcesInfo'; import CommunitiesInfo from './Communities'; +import { chatModeLables } from '../../utils/Constants'; const ChatInfoModal: React.FC = ({ sources, @@ -45,7 +46,7 @@ const ChatInfoModal: React.FC = ({ }) => { const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); - const [activeTab, setActiveTab] = useState(error?.length ? 10 : mode === 'graph' ? 4 : 3); + const [activeTab, setActiveTab] = useState(error?.length ? 10 : mode === chatModeLables.graph ? 4 : 3); const [infoEntities, setInfoEntities] = useState([]); const [communities, setCommunities] = useState([]); const [loading, setLoading] = useState(false); @@ -81,7 +82,7 @@ const ChatInfoModal: React.FC = ({ ); useEffect(() => { - if (mode != 'graph' || error?.trim() !== '') { + if (mode != chatModeLables.graph || error?.trim() !== '') { (async () => { setLoading(true); try { @@ -89,7 +90,7 @@ const ChatInfoModal: React.FC = ({ userCredentials as UserCredentials, chunk_ids.map((c) => c.id).join(','), userCredentials?.database, - mode === 'entity search+vector' + mode === chatModeLables.entity_vector ); if (response.data.status === 'Failure') { throw new Error(response.data.error); @@ -101,7 +102,7 @@ const ChatInfoModal: React.FC = ({ setInfoEntities( nodesData.map((n: Entity) => { - if (!n.labels.length && mode === 'entity search+vector') { + if (!n.labels.length && mode === chatModeLables.entity_vector) { return { ...n, labels: ['Entity'], @@ -112,7 +113,7 @@ const ChatInfoModal: React.FC = ({ ); setNodes( nodesData.map((n: ExtendedNode) => { - if (!n.labels.length && mode === 'entity search+vector') { + if (!n.labels.length && mode === chatModeLables.entity_vector) { return { ...n, labels: ['Entity'], @@ -174,22 +175,22 @@ const ChatInfoModal: React.FC = ({ {error} ) : ( - {mode != 'graph' ? Sources used : <>} - {mode != 'graph' ? Chunks : <>} - {mode === 'graph+vector' || - mode === 'graph' || - mode === 'graph+vector+fulltext' || - mode === 'entity search+vector' ? ( + {mode != chatModeLables.graph ? Sources used : <>} + {mode != chatModeLables.graph ? Chunks : <>} + {mode === chatModeLables.graph_vector || + mode === chatModeLables.graph || + mode === chatModeLables.graph_vector_fulltext || + mode === chatModeLables.entity_vector ? ( Top Entities used ) : ( <> )} - {mode === 'graph' && cypher_query?.trim()?.length ? ( + {mode === chatModeLables.graph && cypher_query?.trim()?.length ? ( Generated Cypher Query ) : ( <> )} - {mode === 'entity search+vector' ? Communities : <>} + {mode === chatModeLables.entity_vector? Communities : <>} )} @@ -216,7 +217,7 @@ const ChatInfoModal: React.FC = ({ className='min-h-40' /> - {mode === 'entity search+vector' ? ( + {mode === chatModeLables.entity_vector ? ( diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 944157f1e..6c9bf459f 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -2,7 +2,7 @@ import { StatusIndicator, Typography } from '@neo4j-ndl/react'; import { useMemo, useEffect } from 'react'; import { useFileContext } from '../../context/UsersFiles'; import CustomMenu from '../UI/Menu'; -import { chatModes } from '../../utils/Constants'; +import { chatModeLables, chatModes } from '../../utils/Constants'; import { capitalize } from '@mui/material'; import { capitalizeWithPlus } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; @@ -25,23 +25,23 @@ export default function ChatModeToggle({ useEffect(() => { if (selectedRows.length !== 0) { - setchatMode('graph+vector'); + setchatMode(chatModeLables.graph_vector); } else { - setchatMode('graph+vector+fulltext'); + setchatMode(chatModeLables.graph_vector_fulltext); } }, [selectedRows]); const memoizedChatModes = useMemo(() => { return isGdsActive && isCommunityAllowed ? chatModes - : chatModes?.filter((m) => !m.mode.includes('entity search+vector')); + : chatModes?.filter((m) => !m.mode.includes(chatModeLables.entity_vector)); }, [isGdsActive, isCommunityAllowed]); const menuItems = useMemo(() => { return memoizedChatModes?.map((m) => { - const isDisabled = Boolean(selectedRows.length && !(m.mode === 'vector' || m.mode === 'graph+vector')); + const isDisabled = Boolean(selectedRows.length && !(m.mode === chatModeLables.vector || m.mode === chatModeLables.graph_vector)); const handleModeChange = () => { if (isDisabled) { - setchatMode('graph+vector'); + setchatMode(chatModeLables.graph_vector); } else { setchatMode(m.mode); } @@ -64,12 +64,12 @@ export default function ChatModeToggle({ {chatMode === m.mode && ( <> - Selected + {chatModeLables.selected} )} {isDisabled && ( <> - Chatmode not available + {chatModeLables.unavailableChatMode} )} diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 5e931ad35..412f485ed 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -15,7 +15,7 @@ import { useFileContext } from '../../context/UsersFiles'; import clsx from 'clsx'; import ReactMarkdown from 'react-markdown'; import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; -import { buttonCaptions, tooltips } from '../../utils/Constants'; +import { buttonCaptions, chatModeLables, tooltips } from '../../utils/Constants'; import useSpeechSynthesis from '../../hooks/useSpeech'; import ButtonWithToolTip from '../UI/ButtonWithToolTip'; import FallBackDialog from '../UI/FallBackDialog'; @@ -44,7 +44,7 @@ const Chatbot: FC = (props) => { const [tokensUsed, setTokensUsed] = useState(0); const [cypherQuery, setcypherQuery] = useState(''); const [copyMessageId, setCopyMessageId] = useState(null); - const [chatsMode, setChatsMode] = useState('graph+vector'); + const [chatsMode, setChatsMode] = useState(chatModeLables.graph_vector); const [graphEntitites, setgraphEntitites] = useState<[]>([]); const [messageError, setmessageError] = useState(''); diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index df805504e..fbab1b719 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -1,6 +1,6 @@ import { createContext, useContext, useState, Dispatch, SetStateAction, FC, useEffect } from 'react'; import { CustomFile, FileContextProviderProps, OptionType } from '../types'; -import { defaultLLM } from '../utils/Constants'; +import { chatModeLables, defaultLLM } from '../utils/Constants'; import { useCredentials } from './UserCredentials'; import Queue from '../utils/Queue'; interface showTextFromSchemaDialogType { @@ -58,7 +58,7 @@ const FileContextProvider: FC = ({ children }) => { const [selectedSchemas, setSelectedSchemas] = useState([]); const [rowSelection, setRowSelection] = useState>({}); const [selectedRows, setSelectedRows] = useState([]); - const [chatMode, setchatMode] = useState('graph+vector+fulltext'); + const [chatMode, setchatMode] = useState(chatModeLables.graph_vector_fulltext); const [isSchema, setIsSchema] = useState(false); const [showTextFromSchemaDialog, setShowTextFromSchemaDialog] = useState({ triggeredFrom: '', diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index e5cb806c8..0b2f60cba 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -65,6 +65,16 @@ export const defaultLLM = llms?.includes('openai-gpt-4o') // ? process.env.VITE_CHAT_MODES?.split(',') // : ['vector', 'graph', 'graph+vector', 'fulltext', 'graph+vector+fulltext', 'local community', 'global community']; +export const chatModeLables = { + vector: 'vector', + graph : 'graph', + graph_vector: 'graph+vector', + fulltext: 'fulltext', + graph_vector_fulltext: 'graph+vector+fulltext', + entity_vector:'entity search+vector', + unavailableChatMode: 'Chat mode is unavailable when rows are selected', + selected:'Selected' +} export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',').map((mode) => ({ @@ -73,33 +83,33 @@ export const chatModes = })) : [ { - mode: 'vector', - description: 'Utilizes vector indexing on text chunks to enable semantic similarity search.', + mode: chatModeLables.vector, + description: 'Performs semantic similarity search on text chunks using vector indexing.', }, { - mode: 'graph', + mode: chatModeLables.graph, description: - 'Leverages text-to-cypher translation to query a database and retrieve relevant data, ensuring a highly targeted and contextually accurate response.', + 'Translates text to Cypher queries for precise data retrieval from a graph database.' }, { - mode: 'graph+vector', + mode: chatModeLables.graph_vector, description: - 'Combines vector indexing on text chunks with graph connections, enhancing search results with contextual relevance by considering relationships between concepts.', + 'Combines vector indexing and graph connections for contextually enhanced semantic search.', }, { - mode: 'fulltext', + mode: chatModeLables.fulltext, description: - 'Employs a fulltext index on text chunks for rapid keyword-based search, efficiently identifying documents containing specific words or phrases.', + 'Conducts fast, keyword-based search using full-text indexing on text chunks.', }, { - mode: 'graph+vector+fulltext', + mode: chatModeLables.graph_vector_fulltext, description: - 'Merges vector indexing, graph connections, and fulltext indexing for a comprehensive search approach, combining semantic similarity, contextual relevance, and keyword-based search for optimal results.', + 'Integrates vector, graph, and full-text indexing for comprehensive search results.', }, { - mode: 'entity search+vector', + mode: chatModeLables.entity_vector, description: - 'Combines entity node vector indexing with graph connections for accurate entity-based search, providing the most relevant response.', + 'Uses vector indexing on entity nodes for highly relevant entity-based search.', }, ]; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 50ce37c3e..97e7bda07 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -19,6 +19,7 @@ import youtubedarklogo from '../assets/images/youtube-darkmode.svg'; import youtubelightlogo from '../assets/images/youtube-lightmode.svg'; import s3logo from '../assets/images/s3logo.png'; import gcslogo from '../assets/images/gcs.webp'; +import { chatModeLables } from './Constants'; // Get the Url export const url = () => { @@ -415,17 +416,17 @@ export const capitalizeWithPlus = (s: string) => { export const getDescriptionForChatMode = (mode: string): string => { switch (mode.toLowerCase()) { - case 'vector': + case chatModeLables.vector: return 'Utilizes vector indexing on text chunks to enable semantic similarity search.'; - case 'graph': + case chatModeLables.graph: return 'Leverages text-to-cypher translation to query a database and retrieve relevant data, ensuring a highly targeted and contextually accurate response.'; - case 'graph+vector': + case chatModeLables.graph_vector: return 'Combines vector indexing on text chunks with graph connections, enhancing search results with contextual relevance by considering relationships between concepts.'; - case 'fulltext': + case chatModeLables.fulltext: return 'Employs a fulltext index on text chunks for rapid keyword-based search, efficiently identifying documents containing specific words or phrases.'; - case 'graph+vector+fulltext': + case chatModeLables.graph_vector_fulltext: return 'Merges vector indexing, graph connections, and fulltext indexing for a comprehensive search approach, combining semantic similarity, contextual relevance, and keyword-based search for optimal results.'; - case 'entity search+vector': + case chatModeLables.entity_vector: return 'Combines entity node vector indexing with graph connections for accurate entity-based search, providing the most relevant response.'; default: return 'Chat mode description not available'; // Fallback description From adf41067ad5883aff927498cf9857ed211be27f4 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:54:25 +0000 Subject: [PATCH 112/292] commented security header --- backend/score.py | 8 ++++---- backend/src/main.py | 30 ++++++++++++++++++++---------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/backend/score.py b/backend/score.py index 74d22759f..891444e62 100644 --- a/backend/score.py +++ b/backend/score.py @@ -48,10 +48,10 @@ def sick(): app = FastAPI() SecWeb(app=app, Option={'referrer': False, 'xframe': False}) -app.add_middleware(HSTS, Option={'max-age': 4}) -app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': [], 'clearSiteData': {'cache': False, 'storage': False}, 'cacheControl': {'public': False, 's-maxage': 0}}, script_nonce=False, style_nonce=False, report_only=False) -app.add_middleware(XContentTypeOptions) -app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) +# app.add_middleware(HSTS, Option={'max-age': 4}) +# app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) +# app.add_middleware(XContentTypeOptions) +# app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) app.add_middleware( CORSMiddleware, diff --git a/backend/src/main.py b/backend/src/main.py index eebdff70f..7bf7c17bd 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -144,23 +144,33 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type): logging.info(f"match value: {match}") cred_path = os.path.join(os.getcwd(),"llm-experiments_credentials.json") print(f'Credential file path {cred_path}') + if os.path.exists(cred_path): + logging.info("File path exist") + with open(cred_path,'r') as secret_file: + secret_data = json.load(secret_file) + logging.info(f"Project id : {secret_data['project_id']}") + logging.info(f"Universal domain: {secret_data['universe_domain']}") + else: + logging.warning("credntial file path not exist") + video_id = parse_qs(urlparse(youtube_url).query).get('v') print(f'Video Id Youtube: {video_id}') - # google_api_client = GoogleApiClient(service_account_path=Path(cred_path)) - # youtube_loader_channel = GoogleApiYoutubeLoader( - # google_api_client=google_api_client, - # video_ids=[video_id[0].strip()], add_video_info=True - # ) - # youtube_transcript = youtube_loader_channel.load() - + google_api_client = GoogleApiClient(service_account_path=Path(cred_path)) + youtube_loader_channel = GoogleApiYoutubeLoader( + google_api_client=google_api_client, + video_ids=[video_id[0].strip()], add_video_info=True + ) + youtube_transcript = youtube_loader_channel.load() + page_content = youtube_transcript[0].page_content + obj_source_node.file_name = match.group(1)#youtube_transcript[0].metadata["snippet"]["title"] - transcript= get_youtube_combined_transcript(match.group(1)) + # transcript= get_youtube_combined_transcript(match.group(1)) # print(transcript) - if transcript==None or len(transcript)==0: + if page_content==None or len(page_content)==0: message = f"Youtube transcript is not available for : {obj_source_node.file_name}" raise Exception(message) else: - obj_source_node.file_size = sys.getsizeof(transcript) + obj_source_node.file_size = sys.getsizeof(page_content) graphDb_data_Access = graphDBdataAccess(graph) graphDb_data_Access.create_source_node(obj_source_node) From 060fd2d15c7111dc1281fb90db94fd9904b154ec Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 20 Sep 2024 11:39:19 +0530 Subject: [PATCH 113/292] Communities (#721) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * added communities creation * added communities * removed tqdm * removed __Entity__ labels * removed graph_object * removed graph object in the function * Modified queries * added properties and modified to entity labels * Post processing call after all files completion (#716) * Dev To STAGING (#532) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#535) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani… * modified the summary creation * fixed the summary creation * refactored qa integration * modified the chat mode settings * renamed QA_integration * added graphdatascience * moved settings to constants * modified constants * Science Molecule & database icon addition (#722) * DataScience icon addition * added gds status to connect call * icon stroke changes --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Add communities check and show respective chat modes (#729) * DataScience icon addition * added checkbox to create_communities * added conditionall check for community chat modes --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * added local search * 725 add checkbox for create communities (#728) * DataScience icon addition * added checkbox to create_communities * added gds status to connect call * added conditionall check for community chat modes * icon stroke changes * isgds active check * icon changes * isGdsVal change * format fixes * checkbox check uncheck change * graph query --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * modified local search query * added entity details for chat * modified chunkids * modified chunk_entities * Add communities Checkbox to graph viz (#739) * DataScience icon addition * added checkbox to create_communities * added gds status to connect call * added conditionall check for community chat modes * icon stroke changes * isgds active check * icon changes * isGdsVal change * format fixes * checkbox check uncheck change * graph query * checkbox addition * filter logic * filter logic missing checks * updated_graph_query * filter logic optimised * gds active check * handle checkbox show --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * changed name * modified the query * updated entities param * added document to details * added global embedding * added database * added database * modified is_entity * modifies chunk entities * removed QA integration * created neo4j from existing index * Integrate local search to chat details (#746) * added the commuties tab * removed unused variables * removed scipy libarary * added the mode check * Integrated the communities tab * added the cjheck * enabled the top entities mode * tabs order rearange * added the loader to sources tab for entity search+vector * fixed the chat mode per prop --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed entity label * Added Description to chat mode menu (#743) * added tooltips to chat mode menu * addition of description to menu * 741-tooltips-to-selectOptions * menu changes * acommunities name change * close changes * name changes * format fixes * community check * Entity Empty Label fix and Icon * Update Utils.ts * removed __Entity__ labels * spell fix * fixed postprocessing method invoking issue for odd no files * lint fix * Added filesource and name in chunks * Graph communities (#748) * UI changes * modes enable disable * separated sources entities chunk communities * communities added into separate component * Update ChatInfoModal.tsx * added filename and source for chunksinfo * removed the console.log * mode disable changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * aria label addition * code improvements used URL class for host url check * host level check * encryption of localstorage values * 'mode-selection-changes' * added neo4j from existing index to entity vector mode * label changes --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: destiny966113 <90891243+destiny966113@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> Co-authored-by: Ikko Eltociear Ashimine Co-authored-by: Pravesh1988 Co-authored-by: edenbuaa --- backend/requirements.txt | 5 +- backend/score.py | 24 +- backend/src/QA_integration.py | 797 ++-- backend/src/QA_integration_new.py | 424 -- backend/src/QA_optimization.py | 217 - backend/src/chunkid_entities.py | 137 +- backend/src/communities.py | 408 ++ backend/src/graphDB_dataAccess.py | 32 +- backend/src/graph_query.py | 19 +- backend/src/llm.py | 2 + backend/src/main.py | 1 - backend/src/make_relationships.py | 7 +- backend/src/post_processing.py | 4 +- backend/src/shared/common_fn.py | 4 +- backend/src/shared/constants.py | 239 +- experiments/graphrag.ipynb | 3519 +++++++++++++++++ frontend/src/App.css | 26 +- .../src/components/ChatBot/ChatInfoModal.tsx | 400 +- .../src/components/ChatBot/ChatModeToggle.tsx | 106 +- frontend/src/components/ChatBot/Chatbot.tsx | 16 +- frontend/src/components/ChatBot/ChunkInfo.tsx | 125 + .../src/components/ChatBot/Communities.tsx | 36 + .../src/components/ChatBot/EntitiesInfo.tsx | 91 + .../ChatBot/ExpandedChatButtonContainer.tsx | 2 +- .../src/components/ChatBot/Info/InfoModal.tsx | 361 -- .../src/components/ChatBot/SourcesInfo.tsx | 137 + frontend/src/components/Content.tsx | 46 +- .../components/DataSources/Local/DropZone.tsx | 2 +- .../Local/DropZoneForSmallLayouts.tsx | 3 +- frontend/src/components/FileTable.tsx | 2 +- .../components/Graph/CheckboxSelection.tsx | 16 +- .../src/components/Graph/GraphViewModal.tsx | 70 +- frontend/src/components/Graph/LegendsChip.tsx | 11 +- frontend/src/components/Layout/Header.tsx | 2 +- frontend/src/components/Layout/SideNav.tsx | 4 +- .../ConnectionModal/ConnectionModal.tsx | 11 +- .../PostProcessingCheckList/index.tsx | 56 +- .../Popups/Settings/SchemaFromText.tsx | 1 - frontend/src/components/UI/DatabaseIcon.tsx | 25 + .../src/components/UI/DatabaseStatusIcon.tsx | 26 + .../src/components/UI/IconButtonToolTip.tsx | 27 +- frontend/src/components/UI/Menu.tsx | 31 +- .../src/components/UI/ScienceMolecule.tsx | 40 + .../WebSources/GenericSourceButton.tsx | 2 +- frontend/src/context/UserCredentials.tsx | 6 + frontend/src/context/UsersFiles.tsx | 5 +- frontend/src/services/CancelAPI.ts | 2 +- frontend/src/services/ChunkEntitiesInfo.ts | 9 +- frontend/src/types.ts | 47 +- frontend/src/utils/Constants.ts | 79 +- frontend/src/utils/Utils.ts | 196 +- 51 files changed, 5956 insertions(+), 1902 deletions(-) delete mode 100644 backend/src/QA_integration_new.py delete mode 100644 backend/src/QA_optimization.py create mode 100644 backend/src/communities.py create mode 100644 experiments/graphrag.ipynb create mode 100644 frontend/src/components/ChatBot/ChunkInfo.tsx create mode 100644 frontend/src/components/ChatBot/Communities.tsx create mode 100644 frontend/src/components/ChatBot/EntitiesInfo.tsx delete mode 100644 frontend/src/components/ChatBot/Info/InfoModal.tsx create mode 100644 frontend/src/components/ChatBot/SourcesInfo.tsx create mode 100644 frontend/src/components/UI/DatabaseIcon.tsx create mode 100644 frontend/src/components/UI/DatabaseStatusIcon.tsx create mode 100644 frontend/src/components/UI/ScienceMolecule.tsx diff --git a/backend/requirements.txt b/backend/requirements.txt index 7c67f13fa..158458ce1 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -140,7 +140,6 @@ requests==2.32.3 rsa==4.9 s3transfer==0.10.1 safetensors==0.4.1 -scipy==1.10.1 shapely==2.0.3 six==1.16.0 sniffio==1.3.1 @@ -179,4 +178,6 @@ sentence-transformers==3.0.1 google-cloud-logging==3.10.0 PyMuPDF==1.24.5 pypandoc==1.13 -Secweb==1.11.0 \ No newline at end of file +graphdatascience==1.10 +Secweb==1.11.0 + diff --git a/backend/score.py b/backend/score.py index 891444e62..e87fc618e 100644 --- a/backend/score.py +++ b/backend/score.py @@ -2,7 +2,7 @@ from fastapi_health import health from fastapi.middleware.cors import CORSMiddleware from src.main import * -from src.QA_integration_new import * +from src.QA_integration import * from src.entities.user_credential import user_credential from src.shared.common_fn import * import uvicorn @@ -16,6 +16,7 @@ from src.chunkid_entities import get_entities_from_chunkids from src.post_processing import create_fulltext, create_entity_embedding from sse_starlette.sse import EventSourceResponse +from src.communities import create_communities import json from typing import List, Mapping from starlette.middleware.sessions import SessionMiddleware @@ -47,11 +48,6 @@ def sick(): return False app = FastAPI() -SecWeb(app=app, Option={'referrer': False, 'xframe': False}) -# app.add_middleware(HSTS, Option={'max-age': 4}) -# app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) -# app.add_middleware(XContentTypeOptions) -# app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) app.add_middleware( CORSMiddleware, @@ -268,8 +264,12 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database await asyncio.to_thread(create_entity_embedding, graph) json_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logging.info(f'Entity Embeddings created') - - logger.log_struct(json_obj, "INFO") + if "create_communities" in tasks: + model = "openai-gpt-4o" + await asyncio.to_thread(create_communities, uri, userName, password, database,model) + josn_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(josn_obj) + logging.info(f'created communities') return create_api_response('Success', message='All tasks completed successfully') except Exception as e: @@ -311,10 +311,10 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), gc.collect() @app.post("/chunk_entities") -async def chunk_entities(uri=Form(),userName=Form(), password=Form(), chunk_ids=Form(None)): +async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), chunk_ids=Form(None),is_entity=Form()): try: logging.info(f"URI: {uri}, Username: {userName}, chunk_ids: {chunk_ids}") - result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, chunk_ids=chunk_ids) + result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,chunk_ids=chunk_ids,is_entity=json.loads(is_entity.lower())) json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result) @@ -330,6 +330,7 @@ async def chunk_entities(uri=Form(),userName=Form(), password=Form(), chunk_ids= @app.post("/graph_query") async def graph_query( uri: str = Form(), + database: str = Form(), userName: str = Form(), password: str = Form(), document_names: str = Form(None), @@ -341,6 +342,7 @@ async def graph_query( uri=uri, username=userName, password=password, + database=database, document_names=document_names ) json_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc))} @@ -451,7 +453,7 @@ async def generate(): graph = create_graph_database_connection(uri, userName, decoded_password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.get_current_status_document_node(file_name) - print(f'Result of document status in SSE : {result}') + # print(f'Result of document status in SSE : {result}') if len(result) > 0: status = json.dumps({'fileName':file_name, 'status':result[0]['Status'], diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 6a70c3ec0..7ac99f66b 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -1,342 +1,585 @@ -from langchain_community.vectorstores.neo4j_vector import Neo4jVector -from langchain.chains import GraphCypherQAChain -from langchain.graphs import Neo4jGraph import os +import json +from datetime import datetime +import time +from typing import Any from dotenv import load_dotenv -from langchain.chains import RetrievalQA -from langchain.chains import RetrievalQAWithSourcesChain -from langchain_openai import ChatOpenAI -from langchain_openai import OpenAIEmbeddings -from langchain_google_vertexai import VertexAIEmbeddings -from langchain_google_vertexai import ChatVertexAI -from langchain_google_vertexai import HarmBlockThreshold, HarmCategory import logging + +# LangChain imports +from langchain_community.vectorstores.neo4j_vector import Neo4jVector from langchain_community.chat_message_histories import Neo4jChatMessageHistory -from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings -from src.shared.common_fn import load_embedding_model -import re -from typing import Any -from datetime import datetime -import time +from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder +from langchain_core.output_parsers import StrOutputParser +from langchain_core.runnables import RunnableBranch +from langchain.retrievers import ContextualCompressionRetriever +from langchain_community.document_transformers import EmbeddingsRedundantFilter +from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline +from langchain_text_splitters import TokenTextSplitter +from langchain_core.messages import HumanMessage, AIMessage +from langchain.chains import GraphCypherQAChain -load_dotenv() +# LangChain chat models +from langchain_openai import ChatOpenAI, AzureChatOpenAI +from langchain_google_vertexai import ChatVertexAI +from langchain_groq import ChatGroq +from langchain_anthropic import ChatAnthropic +from langchain_fireworks import ChatFireworks +from langchain_aws import ChatBedrock +from langchain_community.chat_models import ChatOllama + +# Local imports +from src.llm import get_llm +from src.shared.common_fn import load_embedding_model +from src.shared.constants import * -openai_api_key = os.environ.get('OPENAI_API_KEY') +load_dotenv() EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') EMBEDDING_FUNCTION , _ = load_embedding_model(EMBEDDING_MODEL) -CHAT_MAX_TOKENS = 1000 - - -# RETRIEVAL_QUERY = """ -# WITH node, score, apoc.text.join([ (node)-[:__HAS_ENTITY__]->(e) | head(labels(e)) + ": "+ e.id],", ") as entities -# MATCH (node)-[:__PART_OF__]->(d:Document) -# WITH d, apoc.text.join(collect(node.text + "\n" + entities),"\n----\n") as text, avg(score) as score -# RETURN text, score, {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName)} as metadata -# """ - -RETRIEVAL_QUERY = """ -WITH node as chunk, score -MATCH (chunk)-[:__PART_OF__]->(d:__Document__) -CALL { WITH chunk -MATCH (chunk)-[:__HAS_ENTITY__]->(e) -MATCH path=(e)(()-[rels:!__HAS_ENTITY__&!__PART_OF__]-()){0,3}(:!__Chunk__&!__Document__) -UNWIND rels as r -RETURN collect(distinct r) as rels -} -WITH d, collect(distinct chunk) as chunks, avg(score) as score, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels -WITH d, score, -[c in chunks | c.text] as texts, -[r in rels | coalesce(apoc.coll.removeAll(labels(startNode(r)),['__Entity__'])[0],"") +":"+ startNode(r).id + " "+ type(r) + " " + coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" + endNode(r).id] as entities -WITH d, score, -apoc.text.join(texts,"\n----\n") + -apoc.text.join(entities,"\n") -as text, entities -RETURN text, score, {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), entities:entities} as metadata -""" - -FINAL_PROMPT = """ -You are an AI-powered question-answering agent tasked with providing accurate and direct responses to user queries. Utilize information from the chat history, current user input, and Relevant Information effectively. - -Response Requirements: -- Deliver concise and direct answers to the user's query without headers unless requested. -- Acknowledge and utilize relevant previous interactions based on the chat history summary. -- Respond to initial greetings appropriately, but avoid including a greeting in subsequent responses unless the chat is restarted or significantly paused. -- For non-general questions, strive to provide answers using chat history and Relevant Information ONLY do not Hallucinate. -- Clearly state if an answer is unknown; avoid speculating. - -Instructions: -- Prioritize directly answering the User Input: {question}. -- Use the Chat History Summary: {chat_summary} to provide context-aware responses. -- Refer to Relevant Information: {vector_result} only if it directly relates to the query. -- Cite sources clearly when using Relevant Information in your response [Sources: {sources}] without fail. The Source must be printed only at the last in the format [Source: source1,source2] . Duplicate sources should be removed. -Ensure that answers are straightforward and context-aware, focusing on being relevant and concise. -""" - -def get_llm(model: str,max_tokens=1000) -> Any: - """Retrieve the specified language model based on the model name.""" - - model_versions = { - "openai-gpt-3.5": "gpt-3.5-turbo-16k", - "gemini-1.0-pro": "gemini-1.0-pro-001", - "gemini-1.5-pro": "gemini-1.5-pro-preview-0409", - "openai-gpt-4": "gpt-4-0125-preview", - "diffbot" : "gpt-4-0125-preview", - "openai-gpt-4o":"gpt-4o", - "openai-gpt-4o-mini": "gpt-4o-mini", - } - if model in model_versions: - model_version = model_versions[model] - logging.info(f"Chat Model: {model}, Model Version: {model_version}") + +def get_total_tokens(ai_response, llm): + try: + if isinstance(llm, (ChatOpenAI, AzureChatOpenAI, ChatFireworks, ChatGroq)): + total_tokens = ai_response.response_metadata.get('token_usage', {}).get('total_tokens', 0) + + elif isinstance(llm, ChatVertexAI): + total_tokens = ai_response.response_metadata.get('usage_metadata', {}).get('prompt_token_count', 0) + + elif isinstance(llm, ChatBedrock): + total_tokens = ai_response.response_metadata.get('usage', {}).get('total_tokens', 0) + + elif isinstance(llm, ChatAnthropic): + input_tokens = int(ai_response.response_metadata.get('usage', {}).get('input_tokens', 0)) + output_tokens = int(ai_response.response_metadata.get('usage', {}).get('output_tokens', 0)) + total_tokens = input_tokens + output_tokens + + elif isinstance(llm, ChatOllama): + total_tokens = ai_response.response_metadata.get("prompt_eval_count", 0) - if "Gemini" in model: - llm = ChatVertexAI( - model_name=model_version, - convert_system_message_to_human=True, - max_tokens=max_tokens, - temperature=0, - safety_settings={ - HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE - } - ) else: - llm = ChatOpenAI(model=model_version, temperature=0,max_tokens=max_tokens) + logging.warning(f"Unrecognized language model: {type(llm)}. Returning 0 tokens.") + total_tokens = 0 - return llm,model_version + except Exception as e: + logging.error(f"Error retrieving total tokens: {e}") + total_tokens = 0 - else: - logging.error(f"Unsupported model: {model}") - return None,None + return total_tokens + +def clear_chat_history(graph, session_id): + try: + history = Neo4jChatMessageHistory( + graph=graph, + session_id=session_id + ) + + history.clear() + + return { + "session_id": session_id, + "message": "The chat history has been cleared.", + "user": "chatbot" + } + + except Exception as e: + logging.error(f"Error clearing chat history for session {session_id}: {e}") + return { + "session_id": session_id, + "message": "Failed to clear chat history.", + "user": "chatbot" + } + +def get_sources_and_chunks(sources_used, docs): + chunkdetails_list = [] + sources_used_set = set(sources_used) + seen_ids_and_scores = set() + + for doc in docs: + try: + source = doc.metadata.get("source") + chunkdetails = doc.metadata.get("chunkdetails", []) + + if source in sources_used_set: + for chunkdetail in chunkdetails: + id = chunkdetail.get("id") + score = round(chunkdetail.get("score", 0), 4) -def vector_embed_results(qa,question): - vector_res={} + id_and_score = (id, score) + + if id_and_score not in seen_ids_and_scores: + seen_ids_and_scores.add(id_and_score) + chunkdetails_list.append({**chunkdetail, "score": score}) + + except Exception as e: + logging.error(f"Error processing document: {e}") + + result = { + 'sources': sources_used, + 'chunkdetails': chunkdetails_list, + "entities" : list() + } + return result + +def get_rag_chain(llm, system_template=CHAT_SYSTEM_TEMPLATE): try: - result = qa({"query": question}) - vector_res['result']=result.get("result") - - sources = set() - entities = set() - for document in result["source_documents"]: - sources.add(document.metadata["source"]) - for entiti in document.metadata["entities"]: - entities.add(entiti) - vector_res['source']=list(sources) - vector_res['entities'] = list(entities) - if len( vector_res['entities']) > 5: - vector_res['entities'] = vector_res['entities'][:5] - - # list_source_docs=[] - # for i in result["source_documents"]: - # list_source_docs.append(i.metadata['source']) - # vector_res['source']=list_source_docs - - # result = qa({"question":question},return_only_outputs=True) - # vector_res['result'] = result.get("answer") - # vector_res["source"] = result.get("sources") + question_answering_prompt = ChatPromptTemplate.from_messages( + [ + ("system", system_template), + MessagesPlaceholder(variable_name="messages"), + ( + "human", + "User question: {input}" + ), + ] + ) + + question_answering_chain = question_answering_prompt | llm + + return question_answering_chain + except Exception as e: - error_message = str(e) - logging.exception(f'Exception in vector embedding in QA component:{error_message}') - # raise Exception(error_message) + logging.error(f"Error creating RAG chain: {e}") + raise + +def format_documents(documents, model): + prompt_token_cutoff = 4 + for model_names, value in CHAT_TOKEN_CUT_OFF.items(): + if model in model_names: + prompt_token_cutoff = value + break + + sorted_documents = sorted(documents, key=lambda doc: doc.state.get("query_similarity_score", 0), reverse=True) + sorted_documents = sorted_documents[:prompt_token_cutoff] + + formatted_docs = [] + sources = set() + lc_entities = {'entities':list()} + + for doc in sorted_documents: + try: + source = doc.metadata.get('source', "unknown") + sources.add(source) + + lc_entities = doc.metadata if 'entities'in doc.metadata.keys() else lc_entities + + formatted_doc = ( + "Document start\n" + f"This Document belongs to the source {source}\n" + f"Content: {doc.page_content}\n" + "Document end\n" + ) + formatted_docs.append(formatted_doc) + + except Exception as e: + logging.error(f"Error formatting document: {e}") - return vector_res + return "\n\n".join(formatted_docs), sources,lc_entities + +def process_documents(docs, question, messages, llm, model,chat_mode_settings): + start_time = time.time() -def save_chat_history(history,user_message,ai_message): try: - # history = Neo4jChatMessageHistory( - # graph=graph, - # session_id=session_id - # ) - history.add_user_message(user_message) - history.add_ai_message(ai_message) - logging.info(f'Successfully saved chat history') + formatted_docs, sources,lc_entities = format_documents(docs, model) + + rag_chain = get_rag_chain(llm=llm) + + ai_response = rag_chain.invoke({ + "messages": messages[:-1], + "context": formatted_docs, + "input": question + }) + if chat_mode_settings["mode"] == "entity search+vector": + result = {'sources': list(), + 'chunkdetails': list()} + result.update(lc_entities) + else: + result = get_sources_and_chunks(sources, docs) + content = ai_response.content + total_tokens = get_total_tokens(ai_response, llm) + + predict_time = time.time() - start_time + logging.info(f"Final response predicted in {predict_time:.2f} seconds") + except Exception as e: - error_message = str(e) - logging.exception(f'Exception in saving chat history:{error_message}') + logging.error(f"Error processing documents: {e}") + raise -def get_chat_history(llm, history): - """Retrieves and summarizes the chat history for a given session.""" + return content, result, total_tokens + +def retrieve_documents(doc_retriever, messages): + start_time = time.time() try: - # history = Neo4jChatMessageHistory( - # graph=graph, - # session_id=session_id - # ) - chat_history = history.messages + docs = doc_retriever.invoke({"messages": messages}) + doc_retrieval_time = time.time() - start_time + logging.info(f"Documents retrieved in {doc_retrieval_time:.2f} seconds") - if not chat_history: - return "" + except Exception as e: + logging.error(f"Error retrieving documents: {e}") + raise + + return docs - if len(chat_history) > 4: - chat_history = chat_history[-4:] +def create_document_retriever_chain(llm, retriever): + try: + logging.info("Starting to create document retriever chain") - condense_template = f""" - Given the following earlier conversation, summarize the chat history. - Make sure to include all relevant information. - Chat History: {chat_history} - """ - chat_summary = llm.predict(condense_template) - return chat_summary + query_transform_prompt = ChatPromptTemplate.from_messages( + [ + ("system", QUESTION_TRANSFORM_TEMPLATE), + MessagesPlaceholder(variable_name="messages") + ] + ) - except Exception as e: - logging.exception(f"Exception in retrieving chat history: {e}") - return "" + output_parser = StrOutputParser() -def clear_chat_history(graph, session_id): + splitter = TokenTextSplitter(chunk_size=CHAT_DOC_SPLIT_SIZE, chunk_overlap=0) + embeddings_filter = EmbeddingsFilter( + embeddings=EMBEDDING_FUNCTION, + similarity_threshold=CHAT_EMBEDDING_FILTER_SCORE_THRESHOLD + ) + + pipeline_compressor = DocumentCompressorPipeline( + transformers=[splitter, embeddings_filter] + ) + + compression_retriever = ContextualCompressionRetriever( + base_compressor=pipeline_compressor, base_retriever=retriever + ) + + query_transforming_retriever_chain = RunnableBranch( + ( + lambda x: len(x.get("messages", [])) == 1, + (lambda x: x["messages"][-1].content) | compression_retriever, + ), + query_transform_prompt | llm | output_parser | compression_retriever, + ).with_config(run_name="chat_retriever_chain") + logging.info("Successfully created document retriever chain") + return query_transforming_retriever_chain + + except Exception as e: + logging.error(f"Error creating document retriever chain: {e}", exc_info=True) + raise + +def initialize_neo4j_vector(graph, chat_mode_settings): try: - logging.info(f"Clearing chat history for session ID: {session_id}") - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id + mode = chat_mode_settings.get('mode', 'undefined') + retrieval_query = chat_mode_settings.get("retrieval_query") + index_name = chat_mode_settings.get("index_name") + keyword_index = chat_mode_settings.get("keyword_index", "") + + if not retrieval_query or not index_name: + raise ValueError("Required settings 'retrieval_query' or 'index_name' are missing.") + + if keyword_index: + neo_db = Neo4jVector.from_existing_graph( + embedding=EMBEDDING_FUNCTION, + index_name=index_name, + retrieval_query=retrieval_query, + graph=graph, + search_type="hybrid", + node_label="Chunk", + embedding_node_property="embedding", + text_node_properties=["text"], + keyword_index_name=keyword_index + ) + logging.info(f"Successfully retrieved Neo4jVector Fulltext index '{index_name}' and keyword index '{keyword_index}'") + elif mode == "entity search+vector": + neo_db = Neo4jVector.from_existing_index( + embedding=EMBEDDING_FUNCTION, + index_name=index_name, + retrieval_query=retrieval_query, + graph=graph + ) + else: + neo_db = Neo4jVector.from_existing_graph( + embedding=EMBEDDING_FUNCTION, + index_name=index_name, + retrieval_query=retrieval_query, + graph=graph, + node_label="Chunk", + embedding_node_property="embedding", + text_node_properties=["text"] + ) + logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'") + except Exception as e: + index_name = chat_mode_settings.get("index_name") + logging.error(f"Error retrieving Neo4jVector index {index_name} : {e}") + raise + return neo_db + +def create_retriever(neo_db, document_names, chat_mode_settings,search_k, score_threshold): + if document_names and chat_mode_settings["document_filter"]: + retriever = neo_db.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={ + 'k': search_k, + 'score_threshold': score_threshold, + 'filter': {'fileName': {'$in': document_names}} + } ) - history.clear() - logging.info("Chat history cleared successfully") + logging.info(f"Successfully created retriever with search_k={search_k}, score_threshold={score_threshold} for documents {document_names}") + else: + retriever = neo_db.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={'k': search_k, 'score_threshold': score_threshold} + ) + logging.info(f"Successfully created retriever with search_k={search_k}, score_threshold={score_threshold}") + return retriever +def get_neo4j_retriever(graph, document_names,chat_mode_settings, search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): + try: + + neo_db = initialize_neo4j_vector(graph, chat_mode_settings) + document_names= list(map(str.strip, json.loads(document_names))) + search_k = LOCAL_COMMUNITY_TOP_K if chat_mode_settings["mode"] == "entity search+vector" else CHAT_SEARCH_KWARG_K + retriever = create_retriever(neo_db, document_names,chat_mode_settings, search_k, score_threshold) + return retriever + except Exception as e: + index_name = chat_mode_settings.get("index_name") + logging.error(f"Error retrieving Neo4jVector index {index_name} or creating retriever: {e}") + raise Exception(f"An error occurred while retrieving the Neo4jVector index or creating the retriever. Please drop and create a new vector index '{index_name}': {e}") from e + + +def setup_chat(model, graph, document_names, chat_mode_settings): + start_time = time.time() + try: + model = "openai-gpt-4o" if model == "diffbot" else model + + llm, model_name = get_llm(model=model) + logging.info(f"Model called in chat: {model} (version: {model_name})") + + retriever = get_neo4j_retriever(graph=graph, chat_mode_settings=chat_mode_settings, document_names=document_names) + doc_retriever = create_document_retriever_chain(llm, retriever) + + chat_setup_time = time.time() - start_time + logging.info(f"Chat setup completed in {chat_setup_time:.2f} seconds") + + except Exception as e: + logging.error(f"Error during chat setup: {e}", exc_info=True) + raise + + return llm, doc_retriever, model_name + +def process_chat_response(messages,history, question, model, graph, document_names,chat_mode_settings): + try: + llm, doc_retriever, model_version = setup_chat(model, graph, document_names,chat_mode_settings) + + docs = retrieve_documents(doc_retriever, messages) + if docs: + content, result, total_tokens = process_documents(docs, question, messages, llm, model,chat_mode_settings) + else: + content = "I couldn't find any relevant documents to answer your question." + result = {"sources": [], "chunkdetails": [],"entities":[]} + total_tokens = 0 + + ai_response = AIMessage(content=content) + messages.append(ai_response) + summarize_and_log(history, messages, llm) + return { - "session_id": session_id, - "message": "The chat history is cleared", + "session_id": "", + "message": content, + "info": { + "sources": result["sources"], + "model": model_version, + "chunkdetails": result["chunkdetails"], + "total_tokens": total_tokens, + "response_time": 0, + "mode": chat_mode_settings["mode"], + "entities" : result["entities"] + }, "user": "chatbot" } + except Exception as e: - logging.exception(f"Error occurred while clearing chat history for session ID {session_id}: {e}") - - -def extract_and_remove_source(message): - pattern = r'\[Source: ([^\]]+)\]' - match = re.search(pattern, message) - if match: - sources_string = match.group(1) - sources = [source.strip().strip("'") for source in sources_string.split(',')] - new_message = re.sub(pattern, '', message).strip() - response = { - "message" : new_message, - "sources" : sources - } - else: - response = { - "message" : message, - "sources" : [] - } - return response - -def clear_chat_history(graph,session_id): - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id - ) - history.clear() - return { - "session_id": session_id, - "message": "The chat History is cleared", + logging.exception(f"Error processing chat response at {datetime.now()}: {str(e)}") + return { + "session_id": "", + "message": "Something went wrong", + "info": { + "sources": [], + "chunkdetails": [], + "total_tokens": 0, + "response_time": 0, + "error": f"{type(e).__name__}: {str(e)}", + "mode": chat_mode_settings["mode"], + "entities": [] + }, "user": "chatbot" - } - -def QA_RAG(graph,model,question,session_id): - logging.info(f"QA_RAG called at {datetime.now()}") - # model = "Gemini Pro" - try: - qa_rag_start_time = time.time() + } +def summarize_and_log(history, stored_messages, llm): + if not stored_messages: + logging.info("No messages to summarize.") + return False + try: start_time = time.time() - neo_db = Neo4jVector.from_existing_index( - embedding=EMBEDDING_FUNCTION, - index_name="vector", - retrieval_query=RETRIEVAL_QUERY, - graph=graph + + summarization_prompt = ChatPromptTemplate.from_messages( + [ + MessagesPlaceholder(variable_name="chat_history"), + ( + "human", + "Summarize the above chat messages into a concise message, focusing on key points and relevant details that could be useful for future conversations. Exclude all introductions and extraneous information." + ), + ] ) + summarization_chain = summarization_prompt | llm - history = Neo4jChatMessageHistory( + summary_message = summarization_chain.invoke({"chat_history": stored_messages}) + + history.clear() + history.add_user_message("Our current conversation summary till now") + history.add_message(summary_message) + + history_summarized_time = time.time() - start_time + logging.info(f"Chat History summarized in {history_summarized_time:.2f} seconds") + + return True + + except Exception as e: + logging.error(f"An error occurred while summarizing messages: {e}") + return False + +def create_graph_chain(model, graph): + try: + logging.info(f"Graph QA Chain using LLM model: {model}") + + cypher_llm,model_name = get_llm(model) + qa_llm,model_name = get_llm(model) + graph_chain = GraphCypherQAChain.from_llm( + cypher_llm=cypher_llm, + qa_llm=qa_llm, + validate_cypher= True, graph=graph, - session_id=session_id + # verbose=True, + return_intermediate_steps = True, + top_k=3 ) - - llm,model_version = get_llm(model=model,max_tokens=CHAT_MAX_TOKENS) - qa = RetrievalQA.from_chain_type( - llm=llm, - chain_type="stuff", - retriever=neo_db.as_retriever(search_kwargs={'k': 3, "score_threshold": 0.7}), - return_source_documents=True - ) - # qa = RetrievalQAWithSourcesChain.from_chain_type( - # llm=llm, - # chain_type="stuff", - # retriever=neo_db.as_retriever(search_kwargs={'k': 3, "score_threshold": 0.7})) + logging.info("GraphCypherQAChain instance created successfully.") + return graph_chain,qa_llm,model_name - db_setup_time = time.time() - start_time - logging.info(f"DB Setup completed in {db_setup_time:.2f} seconds") - - start_time = time.time() - chat_summary = get_chat_history(llm,history) - chat_history_time = time.time() - start_time - logging.info(f"Chat history summarized in {chat_history_time:.2f} seconds") - # print(chat_summary) + except Exception as e: + logging.error(f"An error occurred while creating the GraphCypherQAChain instance. : {e}") - start_time = time.time() - vector_res = vector_embed_results(qa, question) - vector_time = time.time() - start_time - logging.info(f"Vector response obtained in {vector_time:.2f} seconds") - # print(vector_res) +def get_graph_response(graph_chain, question): + try: + cypher_res = graph_chain.invoke({"query": question}) - formatted_prompt = FINAL_PROMPT.format( - question=question, - chat_summary=chat_summary, - vector_result=vector_res.get('result', ''), - sources=vector_res.get('source', '') - ) - # print(formatted_prompt) - - start_time = time.time() - # llm = get_llm(model=model,embedding=False) - response = llm.predict(formatted_prompt) - predict_time = time.time() - start_time - logging.info(f"Response predicted in {predict_time:.2f} seconds") + response = cypher_res.get("result") + cypher_query = "" + context = [] + + for step in cypher_res.get("intermediate_steps", []): + if "query" in step: + cypher_string = step["query"] + cypher_query = cypher_string.replace("cypher\n", "").replace("\n", " ").strip() + elif "context" in step: + context = step["context"] + return { + "response": response, + "cypher_query": cypher_query, + "context": context + } + + except Exception as e: + logging.error(f"An error occurred while getting the graph response : {e}") - start_time = time.time() - ai_message = response - user_message = question - save_chat_history(history, user_message, ai_message) - chat_history_save = time.time() - start_time - logging.info(f"Chat History saved in {chat_history_save:.2f} seconds") +def process_graph_response(model, graph, question, messages, history): + try: + graph_chain, qa_llm, model_version = create_graph_chain(model, graph) - response_data = extract_and_remove_source(response) - message = response_data["message"] - sources = response_data["sources"] + graph_response = get_graph_response(graph_chain, question) - print(f"message : {message}") - print(f"sources : {sources}") - total_call_time = time.time() - qa_rag_start_time - logging.info(f"Total Response time is {total_call_time:.2f} seconds") - return { - "session_id": session_id, - "message": message, + ai_response_content = graph_response.get("response", "Something went wrong") + ai_response = AIMessage(content=ai_response_content) + + messages.append(ai_response) + summarize_and_log(history, messages, qa_llm) + + result = { + "session_id": "", + "message": ai_response_content, "info": { - "sources": sources, - "model":model_version, - "entities":vector_res["entities"] + "model": model_version, + "cypher_query": graph_response.get("cypher_query", ""), + "context": graph_response.get("context", ""), + "mode": "graph", + "response_time": 0 }, "user": "chatbot" - } - + } + + return result + except Exception as e: - logging.exception(f"Exception in QA component at {datetime.now()}: {str(e)}") - error_name = type(e).__name__ + logging.exception(f"Error processing graph response at {datetime.now()}: {str(e)}") return { - "session_id": session_id, + "session_id": "", "message": "Something went wrong", "info": { - "sources": [], - "error": f"{error_name} :- {str(e)}" + "model": model_version, + "cypher_query": "", + "context": "", + "mode": "graph", + "response_time": 0, + "error": f"{type(e).__name__}: {str(e)}" }, - "user": "chatbot"} + "user": "chatbot" + } + +def create_neo4j_chat_message_history(graph, session_id): + """ + Creates and returns a Neo4jChatMessageHistory instance. + + """ + try: + + history = Neo4jChatMessageHistory( + graph=graph, + session_id=session_id + ) + return history + + except Exception as e: + logging.error(f"Error creating Neo4jChatMessageHistory: {e}") + raise + +def get_chat_mode_settings(mode,settings_map=CHAT_MODE_CONFIG_MAP): + default_settings = settings_map["default"] + try: + chat_mode_settings = settings_map.get(mode, default_settings) + chat_mode_settings["mode"] = mode + + logging.info(f"Chat mode settings: {chat_mode_settings}") + except Exception as e: + logging.error(f"Unexpected error: {e}", exc_info=True) + raise + return chat_mode_settings + +def QA_RAG(graph, model, question, document_names, session_id, mode): + logging.info(f"Chat Mode: {mode}") + history = create_neo4j_chat_message_history(graph, session_id) + messages = history.messages + user_question = HumanMessage(content=question) + messages.append(user_question) + if mode == "graph": + result = process_graph_response(model, graph, question, messages, history) + else: + chat_mode_settings = get_chat_mode_settings(mode=mode) + result = process_chat_response(messages,history, question, model, graph, document_names,chat_mode_settings) + + result["session_id"] = session_id + + return result \ No newline at end of file diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration_new.py deleted file mode 100644 index eeac78c1e..000000000 --- a/backend/src/QA_integration_new.py +++ /dev/null @@ -1,424 +0,0 @@ -from langchain_community.vectorstores.neo4j_vector import Neo4jVector -from langchain.graphs import Neo4jGraph -import os -from dotenv import load_dotenv -import logging -from langchain_community.chat_message_histories import Neo4jChatMessageHistory -from src.llm import get_llm -from src.shared.common_fn import load_embedding_model -import re -from typing import Any -from datetime import datetime -import time -from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder -from langchain_core.output_parsers import StrOutputParser -from langchain_core.runnables import RunnableBranch -from langchain.retrievers import ContextualCompressionRetriever -from langchain_community.document_transformers import EmbeddingsRedundantFilter -from langchain.retrievers.document_compressors import EmbeddingsFilter -from langchain.retrievers.document_compressors import DocumentCompressorPipeline -from langchain_text_splitters import TokenTextSplitter -from langchain_core.messages import HumanMessage,AIMessage -from src.shared.constants import * -from src.llm import get_llm -from langchain.chains import GraphCypherQAChain -import json - -## Chat models -from langchain_openai import ChatOpenAI, AzureChatOpenAI -from langchain_google_vertexai import ChatVertexAI -from langchain_groq import ChatGroq -from langchain_anthropic import ChatAnthropic -from langchain_fireworks import ChatFireworks -from langchain_aws import ChatBedrock -from langchain_community.chat_models import ChatOllama - -load_dotenv() - -EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') -EMBEDDING_FUNCTION , _ = load_embedding_model(EMBEDDING_MODEL) - - -def get_neo4j_retriever(graph, retrieval_query,document_names,mode,index_name="vector",keyword_index="keyword", search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): - try: - if mode == "fulltext" or mode == "graph + vector + fulltext": - neo_db = Neo4jVector.from_existing_graph( - embedding=EMBEDDING_FUNCTION, - index_name=index_name, - retrieval_query=retrieval_query, - graph=graph, - search_type="hybrid", - node_label="Chunk", - embedding_node_property="embedding", - text_node_properties=["text"], - keyword_index_name=keyword_index - ) - # neo_db = Neo4jVector.from_existing_index( - # embedding=EMBEDDING_FUNCTION, - # index_name=index_name, - # retrieval_query=retrieval_query, - # graph=graph, - # search_type="hybrid", - # keyword_index_name=keyword_index - # ) - logging.info(f"Successfully retrieved Neo4jVector index '{index_name}' and keyword index '{keyword_index}'") - else: - neo_db = Neo4jVector.from_existing_index( - embedding=EMBEDDING_FUNCTION, - index_name=index_name, - retrieval_query=retrieval_query, - graph=graph - ) - logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'") - document_names= list(map(str.strip, json.loads(document_names))) - if document_names: - retriever = neo_db.as_retriever(search_type="similarity_score_threshold",search_kwargs={'k': search_k, "score_threshold": score_threshold,'filter':{'fileName': {'$in': document_names}}}) - logging.info(f"Successfully created retriever for index '{index_name}' with search_k={search_k}, score_threshold={score_threshold} for documents {document_names}") - else: - retriever = neo_db.as_retriever(search_type="similarity_score_threshold",search_kwargs={'k': search_k, "score_threshold": score_threshold}) - logging.info(f"Successfully created retriever for index '{index_name}' with search_k={search_k}, score_threshold={score_threshold}") - return retriever - except Exception as e: - logging.error(f"Error retrieving Neo4jVector index '{index_name}' or creating retriever: {e}") - raise Exception("An error occurred while retrieving the Neo4jVector index '{index_name}' or creating the retriever. Please drop and create a new vector index: {e}") from e - -def create_document_retriever_chain(llm, retriever): - try: - logging.info("Starting to create document retriever chain") - - query_transform_prompt = ChatPromptTemplate.from_messages( - [ - ("system", QUESTION_TRANSFORM_TEMPLATE), - MessagesPlaceholder(variable_name="messages") - ] - ) - - output_parser = StrOutputParser() - - splitter = TokenTextSplitter(chunk_size=CHAT_DOC_SPLIT_SIZE, chunk_overlap=0) - embeddings_filter = EmbeddingsFilter( - embeddings=EMBEDDING_FUNCTION, - similarity_threshold=CHAT_EMBEDDING_FILTER_SCORE_THRESHOLD - ) - - pipeline_compressor = DocumentCompressorPipeline( - transformers=[splitter, embeddings_filter] - ) - - compression_retriever = ContextualCompressionRetriever( - base_compressor=pipeline_compressor, base_retriever=retriever - ) - - query_transforming_retriever_chain = RunnableBranch( - ( - lambda x: len(x.get("messages", [])) == 1, - (lambda x: x["messages"][-1].content) | compression_retriever, - ), - query_transform_prompt | llm | output_parser | compression_retriever, - ).with_config(run_name="chat_retriever_chain") - - logging.info("Successfully created document retriever chain") - return query_transforming_retriever_chain - - except Exception as e: - logging.error(f"Error creating document retriever chain: {e}", exc_info=True) - raise - - -def create_neo4j_chat_message_history(graph, session_id): - """ - Creates and returns a Neo4jChatMessageHistory instance. - - """ - try: - - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id - ) - return history - - except Exception as e: - logging.error(f"Error creating Neo4jChatMessageHistory: {e}") - raise - -def format_documents(documents,model): - prompt_token_cutoff = 4 - for models,value in CHAT_TOKEN_CUT_OFF.items(): - if model in models: - prompt_token_cutoff = value - - sorted_documents = sorted(documents, key=lambda doc: doc.state["query_similarity_score"], reverse=True) - sorted_documents = sorted_documents[:prompt_token_cutoff] - - formatted_docs = [] - sources = set() - - for doc in sorted_documents: - source = doc.metadata['source'] - sources.add(source) - - formatted_doc = ( - "Document start\n" - f"This Document belongs to the source {source}\n" - f"Content: {doc.page_content}\n" - "Document end\n" - ) - formatted_docs.append(formatted_doc) - - return "\n\n".join(formatted_docs), sources - -def get_rag_chain(llm,system_template=CHAT_SYSTEM_TEMPLATE): - question_answering_prompt = ChatPromptTemplate.from_messages( - [ - ("system", system_template), - MessagesPlaceholder(variable_name="messages"), - ( - "human", - "User question: {input}" - ), - ] - ) - question_answering_chain = question_answering_prompt | llm - - return question_answering_chain - -def get_sources_and_chunks(sources_used, docs): - chunkdetails_list = [] - sources_used_set = set(sources_used) - - for doc in docs: - source = doc.metadata["source"] - chunkdetails = doc.metadata["chunkdetails"] - if source in sources_used_set: - chunkdetails = [{**chunkdetail, "score": round(chunkdetail["score"], 4)} for chunkdetail in chunkdetails] - chunkdetails_list.extend(chunkdetails) - - result = { - 'sources': sources_used, - 'chunkdetails': chunkdetails_list - } - return result - -def summarize_messages(llm,history,stored_messages): - if len(stored_messages) == 0: - return False - summarization_prompt = ChatPromptTemplate.from_messages( - [ - MessagesPlaceholder(variable_name="chat_history"), - ( - "human", - "Summarize the above chat messages into a concise message, focusing on key points and relevant details that could be useful for future conversations. Exclude all introductions and extraneous information." - ), - ] - ) - - summarization_chain = summarization_prompt | llm - - summary_message = summarization_chain.invoke({"chat_history": stored_messages}) - - history.clear() - history.add_user_message("Our current convertaion summary till now") - history.add_message(summary_message) - return True - - -def get_total_tokens(ai_response,llm): - - if isinstance(llm,(ChatOpenAI,AzureChatOpenAI,ChatFireworks,ChatGroq)): - total_tokens = ai_response.response_metadata['token_usage']['total_tokens'] - elif isinstance(llm,(ChatVertexAI)): - total_tokens = ai_response.response_metadata['usage_metadata']['prompt_token_count'] - elif isinstance(llm,(ChatBedrock)): - total_tokens = ai_response.response_metadata['usage']['total_tokens'] - elif isinstance(llm,(ChatAnthropic)): - input_tokens = int(ai_response.response_metadata['usage']['input_tokens']) - output_tokens = int(ai_response.response_metadata['usage']['output_tokens']) - total_tokens = input_tokens + output_tokens - elif isinstance(llm,(ChatOllama)): - total_tokens = ai_response.response_metadata["prompt_eval_count"] - else: - total_tokens = 0 - return total_tokens - - -def clear_chat_history(graph,session_id): - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id - ) - history.clear() - return { - "session_id": session_id, - "message": "The chat History is cleared", - "user": "chatbot" - } - -def setup_chat(model, graph, document_names,retrieval_query,mode): - start_time = time.time() - if model in ["diffbot"]: - model = "openai-gpt-4o" - llm,model_name = get_llm(model) - logging.info(f"Model called in chat {model} and model version is {model_name}") - retriever = get_neo4j_retriever(graph=graph,retrieval_query=retrieval_query,document_names=document_names,mode=mode) - doc_retriever = create_document_retriever_chain(llm, retriever) - chat_setup_time = time.time() - start_time - logging.info(f"Chat setup completed in {chat_setup_time:.2f} seconds") - - return llm, doc_retriever, model_name - -def retrieve_documents(doc_retriever, messages): - start_time = time.time() - docs = doc_retriever.invoke({"messages": messages}) - doc_retrieval_time = time.time() - start_time - logging.info(f"Documents retrieved in {doc_retrieval_time:.2f} seconds") - return docs - -def process_documents(docs, question, messages, llm,model): - start_time = time.time() - formatted_docs, sources = format_documents(docs,model) - rag_chain = get_rag_chain(llm=llm) - ai_response = rag_chain.invoke({ - "messages": messages[:-1], - "context": formatted_docs, - "input": question - }) - result = get_sources_and_chunks(sources, docs) - content = ai_response.content - total_tokens = get_total_tokens(ai_response,llm) - - - predict_time = time.time() - start_time - logging.info(f"Final Response predicted in {predict_time:.2f} seconds") - - return content, result, total_tokens - -def summarize_and_log(history, messages, llm): - start_time = time.time() - summarize_messages(llm, history, messages) - history_summarized_time = time.time() - start_time - logging.info(f"Chat History summarized in {history_summarized_time:.2f} seconds") - - -def create_graph_chain(model, graph): - try: - logging.info(f"Graph QA Chain using LLM model: {model}") - - cypher_llm,model_name = get_llm(model) - qa_llm,model_name = get_llm(model) - graph_chain = GraphCypherQAChain.from_llm( - cypher_llm=cypher_llm, - qa_llm=qa_llm, - validate_cypher= True, - graph=graph, - # verbose=True, - return_intermediate_steps = True, - top_k=3 - ) - - logging.info("GraphCypherQAChain instance created successfully.") - return graph_chain,qa_llm,model_name - - except Exception as e: - logging.error(f"An error occurred while creating the GraphCypherQAChain instance. : {e}") - - -def get_graph_response(graph_chain, question): - try: - cypher_res = graph_chain.invoke({"query": question}) - - response = cypher_res.get("result") - cypher_query = "" - context = [] - - for step in cypher_res.get("intermediate_steps", []): - if "query" in step: - cypher_string = step["query"] - cypher_query = cypher_string.replace("cypher\n", "").replace("\n", " ").strip() - elif "context" in step: - context = step["context"] - return { - "response": response, - "cypher_query": cypher_query, - "context": context - } - - except Exception as e: - logging.error("An error occurred while getting the graph response : {e}") - -def QA_RAG(graph, model, question, document_names,session_id, mode): - try: - logging.info(f"Chat Mode : {mode}") - history = create_neo4j_chat_message_history(graph, session_id) - messages = history.messages - user_question = HumanMessage(content=question) - messages.append(user_question) - - if mode == "graph": - graph_chain, qa_llm,model_version = create_graph_chain(model,graph) - graph_response = get_graph_response(graph_chain,question) - ai_response = AIMessage(content=graph_response["response"]) if graph_response["response"] else AIMessage(content="Something went wrong") - messages.append(ai_response) - summarize_and_log(history, messages, qa_llm) - - result = { - "session_id": session_id, - "message": graph_response["response"], - "info": { - "model": model_version, - 'cypher_query':graph_response["cypher_query"], - "context" : graph_response["context"], - "mode" : mode, - "response_time": 0 - }, - "user": "chatbot" - } - return result - elif mode == "vector" or mode == "fulltext": - retrieval_query = VECTOR_SEARCH_QUERY - else: - retrieval_query = VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT) - - llm, doc_retriever, model_version = setup_chat(model, graph, document_names,retrieval_query,mode) - - docs = retrieve_documents(doc_retriever, messages) - - if docs: - content, result, total_tokens = process_documents(docs, question, messages, llm,model) - else: - content = "I couldn't find any relevant documents to answer your question." - result = {"sources": [], "chunkdetails": []} - total_tokens = 0 - - ai_response = AIMessage(content=content) - messages.append(ai_response) - summarize_and_log(history, messages, llm) - - return { - "session_id": session_id, - "message": content, - "info": { - "sources": result["sources"], - "model": model_version, - "chunkdetails": result["chunkdetails"], - "total_tokens": total_tokens, - "response_time": 0, - "mode": mode - }, - "user": "chatbot" - } - - except Exception as e: - logging.exception(f"Exception in QA component at {datetime.now()}: {str(e)}") - error_name = type(e).__name__ - return { - "session_id": session_id, - "message": "Something went wrong", - "info": { - "sources": [], - "chunkids": [], - "error": f"{error_name} :- {str(e)}", - "mode": mode - }, - "user": "chatbot" - } diff --git a/backend/src/QA_optimization.py b/backend/src/QA_optimization.py deleted file mode 100644 index 70c5ffdc8..000000000 --- a/backend/src/QA_optimization.py +++ /dev/null @@ -1,217 +0,0 @@ -import os -from langchain_community.vectorstores.neo4j_vector import Neo4jVector -from langchain.chains import GraphCypherQAChain -from langchain.graphs import Neo4jGraph -from langchain.chains import RetrievalQA -from langchain_openai import ChatOpenAI -from langchain_openai import OpenAIEmbeddings -import logging -from langchain_community.chat_message_histories import Neo4jChatMessageHistory -import asyncio -from datetime import datetime -from dotenv import load_dotenv -load_dotenv() - -# openai_api_key = os.environ.get('OPENAI_API_KEY') -# model_version='gpt-4-0125-preview' - -class ParallelComponent: - - def __init__(self, uri, userName, password, question, session_id): - self.uri = uri - self.userName = userName - self.password = password - self.question = question - self.session_id = session_id - self.model_version='gpt-4-0125-preview' - self.llm = ChatOpenAI(model= self.model_version, temperature=0) - - # async def execute(self): - # tasks = [] - - # tasks.append(asyncio.create_task(self._vector_embed_results())) - # tasks.append(asyncio.create_task(self._cypher_results())) - # tasks.append(asyncio.create_task(self._get_chat_history())) - - # return await asyncio.gather(*tasks) - async def execute(self): - tasks = [ - self._vector_embed_results(), - # self._cypher_results(), ## Disabled call for cypher_results - self._get_chat_history() - ] - return await asyncio.gather(*tasks) - - async def _vector_embed_results(self): - t=datetime.now() - print("Vector embeddings start time",t) - # retrieval_query=""" - # MATCH (node)-[:__PART_OF__]->(d:Document) - # WITH d, apoc.text.join(collect(node.text),"\n----\n") as text, avg(score) as score - # RETURN text, score, {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName)} as metadata - # """ - retrieval_query=""" - WITH node, score, apoc.text.join([ (node)-[:__HAS_ENTITY__]->(e) | head(labels(e)) + ": "+ e.id],", ") as entities - MATCH (node)-[:__PART_OF__]->(d:__Document__) - WITH d, apoc.text.join(collect(node.text + "\n" + entities),"\n----\n") as text, avg(score) as score - RETURN text, score, {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName)} as metadata - """ - vector_res={} - try: - neo_db=Neo4jVector.from_existing_index( - embedding=OpenAIEmbeddings(), - url=self.uri, - username=self.userName, - password=self.password, - database="neo4j", - index_name="vector", - retrieval_query=retrieval_query, - ) - # llm = ChatOpenAI(model= model_version, temperature=0) - - qa = RetrievalQA.from_chain_type( - llm=self.llm, chain_type="stuff", retriever=neo_db.as_retriever(search_kwargs={'k': 3,"score_threshold": 0.5}), return_source_documents=True - ) - - result = qa({"query": self.question}) - vector_res['result']=result.get("result") - list_source_docs=[] - for i in result["source_documents"]: - list_source_docs.append(i.metadata['source']) - vector_res['source']=list_source_docs - except Exception as e: - error_message = str(e) - logging.exception(f'Exception in vector embedding in QA component:{error_message}') - # raise Exception(error_message) - print("Vector embeddings duration time",datetime.now()-t) - return vector_res - - - async def _cypher_results(self): - try: - t=datetime.now() - print("Cypher QA start time",t) - cypher_res={} - graph = Neo4jGraph( - url=self.uri, - username=self.userName, - password=self.password - ) - - - graph.refresh_schema() - cypher_chain = GraphCypherQAChain.from_llm( - graph=graph, - cypher_llm=ChatOpenAI(temperature=0, model=self.model_version), - qa_llm=ChatOpenAI(temperature=0, model=self.model_version), - validate_cypher=True, # Validate relationship directions - verbose=True, - top_k=2 - ) - try: - cypher_res=cypher_chain.invoke({"query": question}) - except: - cypher_res={} - - except Exception as e: - error_message = str(e) - logging.exception(f'Exception in CypherQAChain in QA component:{error_message}') - # raise Exception(error_message) - print("Cypher QA duration",datetime.now()-t) - return cypher_res - - async def _get_chat_history(self): - try: - t=datetime.now() - print("Get chat history start time:",t) - history = Neo4jChatMessageHistory( - url=self.uri, - username=self.userName, - password=self.password, - session_id=self.session_id - ) - chat_history=history.messages - - if len(chat_history)==0: - return {"result":""} - condense_template = f"""Given the following earlier conversation , Summarise the chat history.Make sure to include all the relevant information. - Chat History: - {chat_history}""" - chat_summary=self.llm.predict(condense_template) - print("Get chat history duration time:",datetime.now()-t) - return {"result":chat_summary} - except Exception as e: - error_message = str(e) - logging.exception(f'Exception in retrieving chat history:{error_message}') - # raise Exception(error_message) - return {"result":''} - - async def final_prompt(self,chat_summary,cypher_res,vector_res): - t=datetime.now() - print('Final prompt start time:',t) - final_prompt = f"""You are a helpful question-answering agent. Your task is to analyze - and synthesize information from two sources: the top result from a similarity search - (unstructured information) and relevant data from a graph database (structured information). - If structured information fails to find an answer then use the answer from unstructured information - and vice versa. I only want a straightforward answer without mentioning from which source you got the answer. You are also receiving - a chat history of the earlier conversation. You should be able to understand the context from the chat history and answer the question. - Given the user's query: {self.question}, provide a meaningful and efficient answer based - on the insights derived from the following data: - chat_summary:{chat_summary} - Structured information: {cypher_res}. - Unstructured information: {vector_res}. - - """ - print(final_prompt) - response = self.llm.predict(final_prompt) - ai_message=response - user_message=question - print('Final prompt duration',datetime.now()-t) - return ai_message,user_message - - - async def _save_chat_history(self,ai_message,user_message): - try: - history = Neo4jChatMessageHistory( - url=self.uri, - username=self.userName, - password=self.password, - session_id=self.session_id - ) - history.add_user_message(user_message) - history.add_ai_message(ai_message) - logging.info(f'Successfully saved chat history') - except Exception as e: - error_message = str(e) - logging.exception(f'Exception in saving chat history:{error_message}') - raise Exception(error_message) - -# Usage example: - -uri=os.environ.get('NEO4J_URI') -userName=os.environ.get('NEO4J_USERNAME') -password=os.environ.get('NEO4J_PASSWORD') -question='Do you know my name?' -session_id=2 - -async def main(uri,userName,password,question,session_id): - t=datetime.now() - parallel_component = ParallelComponent(uri, userName, password, question, session_id) - f_results=await parallel_component.execute() - print(f_results) - # f_vector_result=f_results[0]['result'] - # f_cypher_result=f_results[1].get('result','') - # f_chat_summary=f_results[2]['result'] - f_vector_result=f_results[0]['result'] - f_cypher_result = "" # Passing Empty string for cypher_result - f_chat_summary=f_results[1]['result'] - print(f_vector_result) - print(f_cypher_result) - print(f_chat_summary) - ai_message,user_message=await parallel_component.final_prompt(f_chat_summary,f_cypher_result,f_vector_result) - # print(asyncio.gather(asyncio.create_taskparallel_component.final_prompt(f_chat_summary,f_cypher_result,f_vector_result))) - await parallel_component._save_chat_history(ai_message,user_message) - print("Total Time taken:",datetime.now()-t) - print("Response from AI:",ai_message) -# Run with an event loop -asyncio.run(main(uri,userName,password,question,session_id)) \ No newline at end of file diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index cdbd358d5..aa8d1d4ca 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -1,24 +1,6 @@ import logging -from neo4j import graph from src.graph_query import * - -CHUNK_QUERY = """ -match (chunk:Chunk) where chunk.id IN $chunksIds - -MATCH (chunk)-[:PART_OF]->(d:Document) -CALL {WITH chunk -MATCH (chunk)-[:HAS_ENTITY]->(e) -MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk &! Document) -UNWIND rels as r -RETURN collect(distinct r) as rels -} -WITH d, collect(distinct chunk) as chunks, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels -RETURN d as doc, [chunk in chunks | chunk {.*, embedding:null}] as chunks, - [r in rels | {startNode:{element_id:elementId(startNode(r)), labels:labels(startNode(r)), properties:{id:startNode(r).id,description:startNode(r).description}}, - endNode:{element_id:elementId(endNode(r)), labels:labels(endNode(r)), properties:{id:endNode(r).id,description:endNode(r).description}}, - relationship: {type:type(r), element_id:elementId(r)}}] as entities -""" - +from src.shared.constants import * def process_records(records): """ @@ -37,10 +19,22 @@ def process_records(records): relationship = element['relationship'] if start_node['element_id'] not in seen_nodes: + if "labels" in start_node.keys(): + labels = set(start_node["labels"]) + labels.discard("__Entity__") + if not labels: + labels.add('*') + start_node["labels"] = list(labels) nodes.append(start_node) seen_nodes.add(start_node['element_id']) if end_node['element_id'] not in seen_nodes: + if "labels" in end_node.keys(): + labels = set(end_node["labels"]) + labels.discard("__Entity__") + if not labels: + labels.add('*') + end_node["labels"] = list(labels) nodes.append(end_node) seen_nodes.add(end_node['element_id']) @@ -86,8 +80,77 @@ def process_chunk_data(chunk_data): return chunk_properties except Exception as e: logging.error(f"chunkid_entities module: An error occurred while extracting the Chunk text from records: {e}") - -def get_entities_from_chunkids(uri, username, password, chunk_ids): + +def process_chunkids(driver, chunk_ids): + """ + Processes chunk IDs to retrieve chunk data. + """ + try: + logging.info(f"Starting graph query process for chunk ids: {chunk_ids}") + chunk_ids_list = chunk_ids.split(",") + + records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids_list) + result = process_records(records) + logging.info(f"Nodes and relationships are processed") + + result["chunk_data"] = process_chunk_data(records) + logging.info(f"Query process completed successfully for chunk ids: {chunk_ids}") + return result + except Exception as e: + logging.error(f"chunkid_entities module: Error processing chunk ids: {chunk_ids}. Error: {e}") + raise + +def remove_duplicate_nodes(nodes,property="element_id"): + unique_nodes = [] + seen_element_ids = set() + + for node in nodes: + element_id = node[property] + if element_id not in seen_element_ids: + if "labels" in node.keys(): + labels = set(node["labels"]) + labels.discard("__Entity__") + if not labels: + labels.add('*') + node["labels"] = list(labels) + unique_nodes.append(node) + seen_element_ids.add(element_id) + + return unique_nodes + +def process_entityids(driver, chunk_ids): + """ + Processes entity IDs to retrieve local community data. + """ + try: + logging.info(f"Starting graph query process for entity ids: {chunk_ids}") + entity_ids_list = chunk_ids.split(",") + query_body = LOCAL_COMMUNITY_SEARCH_QUERY.format( + topChunks=LOCAL_COMMUNITY_TOP_CHUNKS, + topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, + topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS + ) + query = LOCAL_COMMUNITY_DETAILS_QUERY_PREFIX + query_body + LOCAL_COMMUNITY_DETAILS_QUERY_SUFFIX + + records, summary, keys = driver.execute_query(query, entityIds=entity_ids_list) + + result = process_records(records) + if records: + result["nodes"].extend(records[0]["nodes"]) + result["nodes"] = remove_duplicate_nodes(result["nodes"]) + logging.info(f"Nodes and relationships are processed") + result["chunk_data"] = records[0]["chunks"] + result["community_data"] = records[0]["communities"] + else: + result["chunk_data"] = list() + result["community_data"] = list() + logging.info(f"Query process completed successfully for chunk ids: {chunk_ids}") + return result + except Exception as e: + logging.error(f"chunkid_entities module: Error processing entity ids: {chunk_ids}. Error: {e}") + raise + +def get_entities_from_chunkids(uri, username, password, database ,chunk_ids,is_entity=False): """ Retrieve and process nodes and relationships from a graph database given a list of chunk IDs. @@ -101,24 +164,32 @@ def get_entities_from_chunkids(uri, username, password, chunk_ids): dict: A dictionary with 'nodes' and 'relationships' keys containing processed data, or an error message. """ try: - logging.info(f"Starting graph query process for chunk ids") - if chunk_ids: - chunk_ids_list = chunk_ids.split(",") - driver = get_graphDB_driver(uri, username, password) - records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids_list) - result = process_records(records) - logging.info(f"Nodes and relationships are processed") - result["chunk_data"] = process_chunk_data(records) - logging.info(f"Query process completed successfully for chunk ids") + + driver = get_graphDB_driver(uri, username, password,database) + if not is_entity: + if chunk_ids: + logging.info(f"chunkid_entities module: Starting for chunk ids : {chunk_ids}") + result = process_chunkids(driver,chunk_ids) + else: + logging.info(f"chunkid_entities module: No chunk ids are passed") + result = { + "nodes": [], + "relationships": [], + "chunk_data":[] + } return result + if chunk_ids: + result = process_entityids(driver,chunk_ids) + logging.info(f"chunkid_entities module: Starting for entity ids : {chunk_ids}") else: - logging.info(f"chunkid_entities module: No chunk ids are passed") + logging.info(f"chunkid_entities module: No entity ids are passed") result = { "nodes": [], "relationships": [], - "chunk_data":[] + "chunk_data":[], + "community_data":[] } - return result + return result except Exception as e: logging.error(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Error: {str(e)}") diff --git a/backend/src/communities.py b/backend/src/communities.py new file mode 100644 index 000000000..7086a4786 --- /dev/null +++ b/backend/src/communities.py @@ -0,0 +1,408 @@ +import logging +from graphdatascience import GraphDataScience +from src.llm import get_llm +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.output_parsers import StrOutputParser +from concurrent.futures import ThreadPoolExecutor, as_completed +import os +from src.shared.common_fn import load_embedding_model + + +COMMUNITY_PROJECTION_NAME = "communities" +NODE_PROJECTION = "!Chunk&!Document&!__Community__" +NODE_PROJECTION_ENTITY = "__Entity__" +MAX_WORKERS = 10 + + +CREATE_COMMUNITY_GRAPH_PROJECTION = """ +MATCH (source:{node_projection})-[]->(target:{node_projection}) +WITH source, target, count(*) as weight +WITH gds.graph.project( + '{project_name}', + source, + target, + {{ + relationshipProperties: {{ weight: weight }} + }}, + {{undirectedRelationshipTypes: ['*']}} + ) AS g +RETURN + g.graphName AS graph_name, g.nodeCount AS nodes, g.relationshipCount AS rels +""" + +CREATE_COMMUNITY_CONSTRAINT = "CREATE CONSTRAINT IF NOT EXISTS FOR (c:__Community__) REQUIRE c.id IS UNIQUE;" +CREATE_COMMUNITY_LEVELS = """ +MATCH (e:`__Entity__`) +WHERE e.communities is NOT NULL +UNWIND range(0, size(e.communities) - 1 , 1) AS index +CALL { + WITH e, index + WITH e, index + WHERE index = 0 + MERGE (c:`__Community__` {id: toString(index) + '-' + toString(e.communities[index])}) + ON CREATE SET c.level = index + MERGE (e)-[:IN_COMMUNITY]->(c) + RETURN count(*) AS count_0 +} +CALL { + WITH e, index + WITH e, index + WHERE index > 0 + MERGE (current:`__Community__` {id: toString(index) + '-' + toString(e.communities[index])}) + ON CREATE SET current.level = index + MERGE (previous:`__Community__` {id: toString(index - 1) + '-' + toString(e.communities[index - 1])}) + ON CREATE SET previous.level = index - 1 + MERGE (previous)-[:PARENT_COMMUNITY]->(current) + RETURN count(*) AS count_1 +} +RETURN count(*) +""" +CREATE_COMMUNITY_RANKS = """ +MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(:!Chunk&!Document&!__Community__)<-[HAS_ENTITY]-(:Chunk)<-[]-(d:Document) +WITH c, count(distinct d) AS rank +SET c.community_rank = rank; +""" + +CREATE_PARENT_COMMUNITY_RANKS = """ +MATCH (c:__Community__)<-[:PARENT_COMMUNITY*]-(:__Community__)<-[:IN_COMMUNITY*]-(:!Chunk&!Document&!__Community__)<-[HAS_ENTITY]-(:Chunk)<-[]-(d:Document) +WITH c, count(distinct d) AS rank +SET c.community_rank = rank; +""" + +CREATE_COMMUNITY_WEIGHTS = """ +MATCH (n:`__Community__`)<-[:IN_COMMUNITY]-()<-[:HAS_ENTITY]-(c) +WITH n, count(distinct c) AS chunkCount +SET n.weight = chunkCount +""" +CREATE_PARENT_COMMUNITY_WEIGHTS = """ +MATCH (n:`__Community__`)<-[:PARENT_COMMUNITY*]-(:`__Community__`)<-[:IN_COMMUNITY]-()<-[:HAS_ENTITY]-(c) +WITH n, count(distinct c) AS chunkCount +SET n.weight = chunkCount +""" + +GET_COMMUNITY_INFO = """ +MATCH (c:`__Community__`)<-[:IN_COMMUNITY]-(e) +WHERE c.level = 0 +WITH c, collect(e) AS nodes +WHERE size(nodes) > 1 +CALL apoc.path.subgraphAll(nodes[0], { + whitelistNodes:nodes +}) +YIELD relationships +RETURN c.id AS communityId, + [n in nodes | {id: n.id, description: n.description, type: [el in labels(n) WHERE el <> '__Entity__'][0]}] AS nodes, + [r in relationships | {start: startNode(r).id, type: type(r), end: endNode(r).id}] AS rels +""" + +GET_PARENT_COMMUNITY_INFO = """ +MATCH (p:`__Community__`)<-[:PARENT_COMMUNITY*]-(c:`__Community__`) +WHERE p.summary is null and c.summary is not null +RETURN p.id as communityId, collect(c.summary) as texts +""" + + +STORE_COMMUNITY_SUMMARIES = """ +UNWIND $data AS row +MERGE (c:__Community__ {id:row.community}) +SET c.summary = row.summary +""" + +COMMUNITY_SYSTEM_TEMPLATE = "Given input triples, generate the information summary. No pre-amble." + +COMMUNITY_TEMPLATE = """Based on the provided nodes and relationships that belong to the same graph community, +generate a natural language summary of the provided information: +{community_info} + +Summary:""" + +PARENT_COMMUNITY_SYSTEM_TEMPLATE = "Given an input list of community summaries, generate a summary of the information" + +PARENT_COMMUNITY_TEMPLATE = """Based on the provided list of community summaries that belong to the same graph community, +generate a natural language summary of the information.Include all the necessary information as possible +{community_info} + +Summary:""" + + +GET_COMMUNITY_DETAILS = """ +MATCH (c:`__Community__`) +WHERE c.embedding IS NULL AND c.summary IS NOT NULL +RETURN c.id as communityId, c.summary as text +""" + +WRITE_COMMUNITY_EMBEDDINGS = """ +UNWIND $rows AS row +MATCH (c) WHERE c.id = row.communityId +CALL db.create.setNodeVectorProperty(c, "embedding", row.embedding) +""" + +DROP_COMMUNITIES = "MATCH (c:`__Community__`) DETACH DELETE c" +DROP_COMMUNITY_PROPERTY = "MATCH (e:`__Entity__`) REMOVE e.communities" + + +ENTITY_VECTOR_INDEX_NAME = "entity_vector" +ENTITY_VECTOR_EMBEDDING_DIMENSION = 384 + +CREATE_ENTITY_VECTOR_INDEX = """ +CREATE VECTOR INDEX {index_name} IF NOT EXISTS FOR (e:__Entity__) ON e.embedding +OPTIONS {{ + indexConfig: {{ + `vector.dimensions`: {embedding_dimension}, + `vector.similarity_function`: 'cosine' + }} +}} +""" + + +def get_gds_driver(uri, username, password, database): + try: + gds = GraphDataScience( + endpoint=uri, + auth=(username, password), + database=database + ) + logging.info("Successfully created GDS driver.") + return gds + except Exception as e: + logging.error(f"Failed to create GDS driver: {e}") + raise + +def create_community_graph_projection(gds, project_name=COMMUNITY_PROJECTION_NAME, node_projection=NODE_PROJECTION): + try: + existing_projects = gds.graph.list() + project_exists = existing_projects["graphName"].str.contains(project_name, regex=False).any() + + if project_exists: + logging.info(f"Projection '{project_name}' already exists. Dropping it.") + gds.graph.drop(project_name) + + logging.info(f"Creating new graph project '{project_name}'.") + projection_query = CREATE_COMMUNITY_GRAPH_PROJECTION.format(node_projection=node_projection,project_name=project_name) + graph_projection_result = gds.run_cypher(projection_query) + projection_result = graph_projection_result.to_dict(orient="records")[0] + logging.info(f"Graph projection '{projection_result['graph_name']}' created successfully with {projection_result['nodes']} nodes and {projection_result['rels']} relationships.") + graph_project = gds.graph.get(projection_result['graph_name']) + return graph_project + except Exception as e: + logging.error(f"Failed to create community graph project: {e}") + raise + +def write_communities(gds, graph_project, project_name=COMMUNITY_PROJECTION_NAME): + try: + logging.info(f"Writing communities to the graph project '{project_name}'.") + gds.leiden.write( + graph_project, + writeProperty=project_name, + includeIntermediateCommunities=True, + relationshipWeightProperty="weight" + ) + logging.info("Communities written successfully.") + return True + except Exception as e: + logging.error(f"Failed to write communities: {e}") + return False + + +def get_community_chain(model, is_parent=False,community_template=COMMUNITY_TEMPLATE,system_template=COMMUNITY_SYSTEM_TEMPLATE): + try: + if is_parent: + community_template=PARENT_COMMUNITY_TEMPLATE + system_template= PARENT_COMMUNITY_SYSTEM_TEMPLATE + llm, model_name = get_llm(model) + community_prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + system_template, + ), + ("human", community_template), + ] + ) + + community_chain = community_prompt | llm | StrOutputParser() + return community_chain + except Exception as e: + logging.error(f"Failed to create community chain: {e}") + raise + +def prepare_string(community_data): + try: + nodes_description = "Nodes are:\n" + for node in community_data['nodes']: + node_id = node['id'] + node_type = node['type'] + node_description = f", description: {node['description']}" if 'description' in node and node['description'] else "" + nodes_description += f"id: {node_id}, type: {node_type}{node_description}\n" + + relationships_description = "Relationships are:\n" + for rel in community_data['rels']: + start_node = rel['start'] + end_node = rel['end'] + relationship_type = rel['type'] + relationship_description = f", description: {rel['description']}" if 'description' in rel and rel['description'] else "" + relationships_description += f"({start_node})-[:{relationship_type}]->({end_node}){relationship_description}\n" + return nodes_description + "\n" + relationships_description + except Exception as e: + logging.error(f"Failed to prepare string from community data: {e}") + raise + +def process_community_info(community, chain, is_parent=False): + try: + if is_parent: + combined_text = " ".join(f"Summary {i+1}: {summary}" for i, summary in enumerate(community.get("texts", []))) + else: + combined_text = prepare_string(community) + summary = chain.invoke({'community_info': combined_text}) + return {"community": community['communityId'], "summary": summary} + except Exception as e: + logging.error(f"Failed to process community {community.get('communityId', 'unknown')}: {e}") + return None + +def create_community_summaries(gds, model): + try: + community_info_list = gds.run_cypher(GET_COMMUNITY_INFO) + community_chain = get_community_chain(model) + + summaries = [] + with ThreadPoolExecutor() as executor: + futures = [executor.submit(process_community_info, community, community_chain) for community in community_info_list.to_dict(orient="records")] + + for future in as_completed(futures): + result = future.result() + if result: + summaries.append(result) + else: + logging.error("community summaries could not be processed.") + + gds.run_cypher(STORE_COMMUNITY_SUMMARIES, params={"data": summaries}) + + parent_community_info = gds.run_cypher(GET_PARENT_COMMUNITY_INFO) + parent_community_chain = get_community_chain(model, is_parent=True) + + parent_summaries = [] + with ThreadPoolExecutor() as executor: + futures = [executor.submit(process_community_info, community, parent_community_chain, is_parent=True) for community in parent_community_info.to_dict(orient="records")] + + for future in as_completed(futures): + result = future.result() + if result: + parent_summaries.append(result) + else: + logging.error("parent community summaries could not be processed.") + + gds.run_cypher(STORE_COMMUNITY_SUMMARIES, params={"data": parent_summaries}) + + except Exception as e: + logging.error(f"Failed to create community summaries: {e}") + raise + +def create_community_embeddings(gds): + try: + embedding_model = os.getenv('EMBEDDING_MODEL') + embeddings, dimension = load_embedding_model(embedding_model) + logging.info(f"Embedding model '{embedding_model}' loaded successfully.") + + logging.info("Fetching community details.") + rows = gds.run_cypher(GET_COMMUNITY_DETAILS) + rows = rows[['communityId', 'text']].to_dict(orient='records') + logging.info(f"Fetched {len(rows)} communities.") + + batch_size = 100 + for i in range(0, len(rows), batch_size): + batch_rows = rows[i:i+batch_size] + for row in batch_rows: + try: + row['embedding'] = embeddings.embed_query(row['text']) + except Exception as e: + logging.error(f"Failed to embed text for community ID {row['communityId']}: {e}") + row['embedding'] = None + + try: + logging.info("Writing embeddings to the database.") + gds.run_cypher(WRITE_COMMUNITY_EMBEDDINGS, params={'rows': batch_rows}) + logging.info("Embeddings written successfully.") + except Exception as e: + logging.error(f"Failed to write embeddings to the database: {e}") + continue + return dimension + except Exception as e: + logging.error(f"An error occurred during the community embedding process: {e}") + +def create_entity_vector_index(gds, embedding_dimension=ENTITY_VECTOR_EMBEDDING_DIMENSION): + query = CREATE_ENTITY_VECTOR_INDEX.format( + index_name=ENTITY_VECTOR_INDEX_NAME, + embedding_dimension=embedding_dimension + ) + try: + logging.info(f"Running Cypher query to create entity vector index: {query}") + gds.run_cypher(query) + logging.info("Entity vector index created successfully.") + except Exception as e: + logging.error(f"Error occurred while creating entity vector index: {e}", exc_info=True) + +def create_community_properties(gds, model): + commands = [ + (CREATE_COMMUNITY_CONSTRAINT, "created community constraint to the graph."), + (CREATE_COMMUNITY_LEVELS, "Successfully created community levels."), + (CREATE_COMMUNITY_RANKS, "Successfully created community ranks."), + (CREATE_PARENT_COMMUNITY_RANKS, "Successfully created parent community ranks."), + (CREATE_COMMUNITY_WEIGHTS, "Successfully created community weights."), + (CREATE_PARENT_COMMUNITY_WEIGHTS, "Successfully created parent community weights."), + ] + try: + for command, message in commands: + gds.run_cypher(command) + logging.info(message) + + create_community_summaries(gds, model) + logging.info("Successfully created community summaries.") + + embedding_dimension = create_community_embeddings(gds) + logging.info("Successfully created community embeddings.") + + create_entity_vector_index(gds,embedding_dimension=embedding_dimension) + logging.info("Successfully created Entity Vector Index.") + + except Exception as e: + logging.error(f"Error during community properties creation: {e}") + raise + + +def clear_communities(gds): + try: + logging.info("Starting to clear communities.") + + logging.info("Dropping communities...") + gds.run_cypher(DROP_COMMUNITIES) + logging.info(f"Communities dropped successfully") + + logging.info("Dropping community property from entities...") + gds.run_cypher(DROP_COMMUNITY_PROPERTY) + logging.info(f"Community property dropped successfully") + + except Exception as e: + logging.error(f"An error occurred while clearing communities: {e}") + raise + + +def create_communities(uri, username, password, database,model): + try: + gds = get_gds_driver(uri, username, password, database) + clear_communities(gds) + + graph_project = create_community_graph_projection(gds) + write_communities_sucess = write_communities(gds, graph_project) + if write_communities_sucess: + logging.info("Starting Community properties creation process.") + create_community_properties(gds,model) + logging.info("Communities creation process completed successfully.") + else: + logging.warning("Failed to write communities. Constraint was not applied.") + except Exception as e: + logging.error(f"Failed to create communities: {e}") + + + + + + diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index 36a9a2925..279451bdf 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -140,6 +140,27 @@ def update_KNN_graph(self): ) else: logging.info("Vector index does not exist, So KNN graph not update") + + def check_gds_version(self): + try: + gds_procedure_count = """ + SHOW PROCEDURES + YIELD name + WHERE name STARTS WITH "gds." + RETURN COUNT(*) AS totalGdsProcedures + """ + result = self.graph.query(gds_procedure_count) + total_gds_procedures = result[0]['totalGdsProcedures'] if result else 0 + + if total_gds_procedures > 0: + logging.info("GDS is available in the database.") + return True + else: + logging.info("GDS is not available in the database.") + return False + except Exception as e: + logging.error(f"An error occurred while checking GDS version: {e}") + return False def connection_check_and_get_vector_dimensions(self): """ @@ -166,19 +187,20 @@ def connection_check_and_get_vector_dimensions(self): embedding_model = os.getenv('EMBEDDING_MODEL') embeddings, application_dimension = load_embedding_model(embedding_model) logging.info(f'embedding model:{embeddings} and dimesion:{application_dimension}') - # print(chunks_exists) + + gds_status = self.check_gds_version() if self.graph: if len(db_vector_dimension) > 0: - return {'db_vector_dimension': db_vector_dimension[0]['vector_dimensions'], 'application_dimension':application_dimension, 'message':"Connection Successful"} + return {'db_vector_dimension': db_vector_dimension[0]['vector_dimensions'], 'application_dimension':application_dimension, 'message':"Connection Successful","gds_status":gds_status} else: if len(db_vector_dimension) == 0 and len(result_chunks) == 0: logging.info("Chunks and vector index does not exists in database") - return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":False} + return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":False,"gds_status":gds_status} elif len(db_vector_dimension) == 0 and result_chunks[0]['hasEmbedding']==0 and result_chunks[0]['chunks'] > 0: - return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":True} + return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":True,"gds_status":gds_status} else: - return {'message':"Connection Successful"} + return {'message':"Connection Successful","gds_status":gds_status} def execute_query(self, query, param=None): return self.graph.query(query, param) diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py index 5468fe2c3..fb7333b48 100644 --- a/backend/src/graph_query.py +++ b/backend/src/graph_query.py @@ -8,7 +8,7 @@ # watch("neo4j") -def get_graphDB_driver(uri, username, password): +def get_graphDB_driver(uri, username, password,database="neo4j"): """ Creates and returns a Neo4j database driver instance configured with the provided credentials. @@ -20,9 +20,9 @@ def get_graphDB_driver(uri, username, password): logging.info(f"Attempting to connect to the Neo4j database at {uri}") enable_user_agent = os.environ.get("ENABLE_USER_AGENT", "False").lower() in ("true", "1", "yes") if enable_user_agent: - driver = GraphDatabase.driver(uri, auth=(username, password), user_agent=os.environ.get('NEO4J_USER_AGENT')) + driver = GraphDatabase.driver(uri, auth=(username, password),database=database, user_agent=os.environ.get('NEO4J_USER_AGENT')) else: - driver = GraphDatabase.driver(uri, auth=(username, password)) + driver = GraphDatabase.driver(uri, auth=(username, password),database=database) logging.info("Connection successful") return driver except Exception as e: @@ -61,15 +61,20 @@ def process_node(node): with datetime objects formatted as ISO strings. """ try: + labels = set(node.labels) + labels.discard("__Entity__") + if not labels: + labels.add('*') + node_element = { "element_id": node.element_id, - "labels": list(node.labels), + "labels": list(labels), "properties": {} } # logging.info(f"Processing node with element ID: {node.element_id}") for key in node: - if key in ["embedding", "text"]: + if key in ["embedding", "text", "summary"]: continue value = node.get(key) if isinstance(value, time.DateTime): @@ -178,7 +183,7 @@ def get_completed_documents(driver): return documents -def get_graph_results(uri, username, password,document_names): +def get_graph_results(uri, username, password,database,document_names): """ Retrieves graph data by executing a specified Cypher query using credentials and parameters provided. Processes the results to extract nodes and relationships and packages them in a structured output. @@ -195,7 +200,7 @@ def get_graph_results(uri, username, password,document_names): """ try: logging.info(f"Starting graph query process") - driver = get_graphDB_driver(uri, username, password) + driver = get_graphDB_driver(uri, username, password,database) document_names= list(map(str.strip, json.loads(document_names))) query = GRAPH_QUERY.format(graph_chunk_limit=GRAPH_CHUNK_LIMIT) records, summary , keys = execute_query(driver, query.strip(), document_names) diff --git a/backend/src/llm.py b/backend/src/llm.py index 0ee61b650..505bb89fb 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -153,9 +153,11 @@ def get_graph_document_list( node_properties = False else: node_properties = ["description"] + relationship_properties = ["description"] llm_transformer = LLMGraphTransformer( llm=llm, node_properties=node_properties, + relationship_properties=relationship_properties, allowed_nodes=allowedNodes, allowed_relationships=allowedRelationship, ) diff --git a/backend/src/main.py b/backend/src/main.py index 7bf7c17bd..8bae03809 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -325,7 +325,6 @@ def processing_source(uri, userName, password, database, model, file_name, pages # selected_chunks = [] is_cancelled_status = False job_status = "Completed" - for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed): select_chunks_upto = i+update_graph_chunk_processed logging.info(f'Selected Chunks upto: {select_chunks_upto}') diff --git a/backend/src/make_relationships.py b/backend/src/make_relationships.py index 4783c5abe..1ea2729e5 100644 --- a/backend/src/make_relationships.py +++ b/backend/src/make_relationships.py @@ -9,6 +9,9 @@ logging.basicConfig(format='%(asctime)s - %(message)s',level='INFO') +EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') +EMBEDDING_FUNCTION , EMBEDDING_DIMENSION = load_embedding_model(EMBEDDING_MODEL) + def merge_relationship_between_chunk_and_entites(graph: Neo4jGraph, graph_documents_chunk_chunk_Id : list): batch_data = [] logging.info("Create HAS_ENTITY relationship between chunks and entities") @@ -39,9 +42,9 @@ def merge_relationship_between_chunk_and_entites(graph: Neo4jGraph, graph_docume def update_embedding_create_vector_index(graph, chunkId_chunkDoc_list, file_name): #create embedding isEmbedding = os.getenv('IS_EMBEDDING') - embedding_model = os.getenv('EMBEDDING_MODEL') + # embedding_model = os.getenv('EMBEDDING_MODEL') - embeddings, dimension = load_embedding_model(embedding_model) + embeddings, dimension = EMBEDDING_FUNCTION , EMBEDDING_DIMENSION logging.info(f'embedding model:{embeddings} and dimesion:{dimension}') data_for_query = [] logging.info(f"update embedding and vector index for chunks") diff --git a/backend/src/post_processing.py b/backend/src/post_processing.py index fa582e107..7d038c61b 100644 --- a/backend/src/post_processing.py +++ b/backend/src/post_processing.py @@ -8,7 +8,7 @@ DROP_INDEX_QUERY = "DROP INDEX entities IF EXISTS;" LABELS_QUERY = "CALL db.labels()" FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX entities FOR (n{labels_str}) ON EACH [n.id, n.description];" -FILTER_LABELS = ["Chunk","Document"] +FILTER_LABELS = ["Chunk","Document","__Community__"] HYBRID_SEARCH_INDEX_DROP_QUERY = "DROP INDEX keyword IF EXISTS;" @@ -80,7 +80,7 @@ def create_entity_embedding(graph:Neo4jGraph): def fetch_entities_for_embedding(graph): query = """ MATCH (e) - WHERE NOT (e:Chunk OR e:Document) AND e.embedding IS NULL AND e.id IS NOT NULL + WHERE NOT (e:Chunk OR e:Document OR e:`__Community__`) AND e.embedding IS NULL AND e.id IS NOT NULL RETURN elementId(e) AS elementId, e.id + " " + coalesce(e.description, "") AS text """ result = graph.query(query) diff --git a/backend/src/shared/common_fn.py b/backend/src/shared/common_fn.py index b0dbe2f5f..6d24912c7 100644 --- a/backend/src/shared/common_fn.py +++ b/backend/src/shared/common_fn.py @@ -94,8 +94,8 @@ def load_embedding_model(embedding_model_name: str): return embeddings, dimension def save_graphDocuments_in_neo4j(graph:Neo4jGraph, graph_document_list:List[GraphDocument]): - # graph.add_graph_documents(graph_document_list, baseEntityLabel=True) - graph.add_graph_documents(graph_document_list) + graph.add_graph_documents(graph_document_list, baseEntityLabel=True,include_source=True) + # graph.add_graph_documents(graph_document_list) def handle_backticks_nodes_relationship_id_type(graph_document_list:List[GraphDocument]): for graph_document in graph_document_list: diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index c6d404a7f..7856dbec2 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -16,6 +16,7 @@ PROJECT_ID = 'llm-experiments-387609' GRAPH_CHUNK_LIMIT = 50 + #query GRAPH_QUERY = """ MATCH docs = (d:Document) @@ -38,15 +39,26 @@ CALL {{ WITH selectedChunks UNWIND selectedChunks as c - + OPTIONAL MATCH entities=(c:Chunk)-[:HAS_ENTITY]->(e) OPTIONAL MATCH entityRels=(e)--(e2:!Chunk) WHERE exists {{ (e2)<-[:HAS_ENTITY]-(other) WHERE other IN selectedChunks }} - RETURN collect(entities) as entities, collect(entityRels) as entityRels + RETURN entities , entityRels, collect(DISTINCT e) as entity }} +WITH docs,chunks,chunkRels, collect(entities) as entities, collect(entityRels) as entityRels, entity -WITH apoc.coll.flatten(docs + chunks + chunkRels + entities + entityRels, true) as paths +WITH * + +CALL {{ + with entity + unwind entity as n + OPTIONAL MATCH community=(n:__Entity__)-[:IN_COMMUNITY]->(p:__Community__) + OPTIONAL MATCH parentcommunity=(p)-[:PARENT_COMMUNITY*]->(p2:__Community__) + return collect(community) as communities , collect(parentcommunity) as parentCommunities +}} + +WITH apoc.coll.flatten(docs + chunks + chunkRels + entities + entityRels + communities + parentCommunities, true) as paths // distinct nodes and rels CALL {{ WITH paths UNWIND paths AS path UNWIND nodes(path) as node WITH distinct node @@ -56,9 +68,26 @@ """ +CHUNK_QUERY = """ +match (chunk:Chunk) where chunk.id IN $chunksIds + +MATCH (chunk)-[:PART_OF]->(d:Document) +CALL {WITH chunk +MATCH (chunk)-[:HAS_ENTITY]->(e) +MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk &! Document &! `__Community__`) +UNWIND rels as r +RETURN collect(distinct r) as rels +} +WITH d, collect(distinct chunk) as chunks, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels +RETURN d as doc, [chunk in chunks | chunk {.*, embedding:null}] as chunks, + [r in rels | {startNode:{element_id:elementId(startNode(r)), labels:labels(startNode(r)), properties:{id:startNode(r).id,description:startNode(r).description}}, + endNode:{element_id:elementId(endNode(r)), labels:labels(endNode(r)), properties:{id:endNode(r).id,description:endNode(r).description}}, + relationship: {type:type(r), element_id:elementId(r)}}] as entities +""" + ## CHAT SETUP CHAT_MAX_TOKENS = 1000 -CHAT_SEARCH_KWARG_K = 3 +CHAT_SEARCH_KWARG_K = 10 CHAT_SEARCH_KWARG_SCORE_THRESHOLD = 0.5 CHAT_DOC_SPLIT_SIZE = 3000 CHAT_EMBEDDING_FILTER_SCORE_THRESHOLD = 0.10 @@ -69,6 +98,12 @@ } +CHAT_TOKEN_CUT_OFF = { + ("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, + ("openai-gpt-4","diffbot" ,'azure_ai_gpt_4o',"openai-gpt-4o", "openai-gpt-4o-mini") : 28, + ("ollama_llama3") : 2 +} + ### CHAT TEMPLATES CHAT_SYSTEM_TEMPLATE = """ You are an AI-powered question-answering agent. Your task is to provide accurate and comprehensive responses to user queries based on the given context, chat history, and available resources. @@ -112,10 +147,8 @@ Note: This system does not generate answers based solely on internal knowledge. It answers from the information provided in the user's current and previous inputs, and from the context. """ - QUESTION_TRANSFORM_TEMPLATE = "Given the below conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else." - ## CHAT QUERIES VECTOR_SEARCH_QUERY = """ WITH node AS chunk, score @@ -130,88 +163,6 @@ {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} as metadata """ -# VECTOR_GRAPH_SEARCH_QUERY=""" -# WITH node as chunk, score -# MATCH (chunk)-[:__PART_OF__]->(d:__Document__) -# CALL { WITH chunk -# MATCH (chunk)-[:__HAS_ENTITY__]->(e) -# MATCH path=(e)(()-[rels:!__HAS_ENTITY__&!__PART_OF__]-()){0,2}(:!__Chunk__&!__Document__) -# UNWIND rels as r -# RETURN collect(distinct r) as rels -# } -# WITH d, collect(DISTINCT {chunk: chunk, score: score}) AS chunks, avg(score) as avg_score, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels -# WITH d, avg_score, -# [c IN chunks | c.chunk.text] AS texts, -# [c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails, -# [r in rels | coalesce(apoc.coll.removeAll(labels(startNode(r)),['__Entity__'])[0],"") +":"+ startNode(r).id + " "+ type(r) + " " + coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" + endNode(r).id] as entities -# WITH d, avg_score,chunkdetails, -# apoc.text.join(texts,"\n----\n") + -# apoc.text.join(entities,"\n") -# as text -# RETURN text, avg_score AS score, {source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} AS metadata -# """ - - -# VECTOR_GRAPH_SEARCH_QUERY = """ -# WITH node as chunk, score -# // find the document of the chunk -# MATCH (chunk)-[:__PART_OF__]->(d:__Document__) -# // fetch entities -# CALL { WITH chunk -# // entities connected to the chunk -# // todo only return entities that are actually in the chunk, remember we connect all extracted entities to all chunks -# MATCH (chunk)-[:__HAS_ENTITY__]->(e) - -# // depending on match to query embedding either 1 or 2 step expansion -# WITH CASE WHEN true // vector.similarity.cosine($embedding, e.embedding ) <= 0.95 -# THEN -# collect { MATCH path=(e)(()-[rels:!__HAS_ENTITY__&!__PART_OF__]-()){0,1}(:!Chunk&!__Document__) RETURN path } -# ELSE -# collect { MATCH path=(e)(()-[rels:!__HAS_ENTITY__&!__PART_OF__]-()){0,2}(:!Chunk&!__Document__) RETURN path } -# END as paths - -# RETURN collect{ unwind paths as p unwind relationships(p) as r return distinct r} as rels, -# collect{ unwind paths as p unwind nodes(p) as n return distinct n} as nodes -# } -# // aggregate chunk-details and de-duplicate nodes and relationships -# WITH d, collect(DISTINCT {chunk: chunk, score: score}) AS chunks, avg(score) as avg_score, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels, - -# // TODO sort by relevancy (embeddding comparision?) cut off after X (e.g. 25) nodes? -# apoc.coll.toSet(apoc.coll.flatten(collect( -# [r in rels |[startNode(r),endNode(r)]]),true)) as nodes - -# // generate metadata and text components for chunks, nodes and relationships -# WITH d, avg_score, -# [c IN chunks | c.chunk.text] AS texts, -# [c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails, -# apoc.coll.sort([n in nodes | - -# coalesce(apoc.coll.removeAll(labels(n),['__Entity__'])[0],"") +":"+ -# n.id + (case when n.description is not null then " ("+ n.description+")" else "" end)]) as nodeTexts, -# apoc.coll.sort([r in rels -# // optional filter if we limit the node-set -# // WHERE startNode(r) in nodes AND endNode(r) in nodes -# | -# coalesce(apoc.coll.removeAll(labels(startNode(r)),['__Entity__'])[0],"") +":"+ -# startNode(r).id + -# " " + type(r) + " " + -# coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" + -# endNode(r).id -# ]) as relTexts - -# // combine texts into response-text -# WITH d, avg_score,chunkdetails, -# "Text Content:\n" + -# apoc.text.join(texts,"\n----\n") + -# "\n----\nEntities:\n"+ -# apoc.text.join(nodeTexts,"\n") + -# "\n----\nRelationships:\n"+ -# apoc.text.join(relTexts,"\n") - -# as text -# RETURN text, avg_score as score, {length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} AS metadata -# """ - VECTOR_GRAPH_SEARCH_ENTITY_LIMIT = 25 VECTOR_GRAPH_SEARCH_QUERY = """ @@ -276,6 +227,118 @@ RETURN text, avg_score as score, {{length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails}} AS metadata """ + +### Local community search +LOCAL_COMMUNITY_TOP_K = 10 +LOCAL_COMMUNITY_TOP_CHUNKS = 3 +LOCAL_COMMUNITY_TOP_COMMUNITIES = 3 +LOCAL_COMMUNITY_TOP_OUTSIDE_RELS = 10 + +LOCAL_COMMUNITY_SEARCH_QUERY = """ +WITH collect(node) as nodes, avg(score) as score, collect({{id: elementId(node), score:score}}) as metadata +WITH score, nodes,metadata, +collect {{ + UNWIND nodes as n + MATCH (n)<-[:HAS_ENTITY]->(c:Chunk) + WITH c, count(distinct n) as freq + RETURN c + ORDER BY freq DESC + LIMIT {topChunks} +}} AS chunks, +collect {{ + UNWIND nodes as n + MATCH (n)-[:IN_COMMUNITY]->(c:__Community__) + WITH c, c.community_rank as rank, c.weight AS weight + RETURN c + ORDER BY rank, weight DESC + LIMIT {topCommunities} +}} AS communities, +collect {{ + unwind nodes as n + unwind nodes as m + match (n)-[r]->(m) + RETURN distinct r + // todo should we have a limit here? +}} as rels, +collect {{ + unwind nodes as n + match path = (n)-[r]-(m:__Entity__) + where not m in nodes + with m, collect(distinct r) as rels, count(*) as freq + ORDER BY freq DESC LIMIT {topOutsideRels} + WITH collect(m) as outsideNodes, apoc.coll.flatten(collect(rels)) as rels + RETURN {{ nodes: outsideNodes, rels: rels }} +}} as outside +""" + +LOCAL_COMMUNITY_SEARCH_QUERY_SUFFIX = """ +RETURN {chunks: [c in chunks | c.text], + communities: [c in communities | c.summary], + entities: [n in nodes | apoc.coll.removeAll(labels(n),["__Entity__"])[0] + ":"+ n.id + " " + coalesce(n.description, "")], + relationships: [r in rels | startNode(r).id+" "+type(r)+" "+endNode(r).id], + outside: { + nodes: [n in outside[0].nodes | apoc.coll.removeAll(labels(n),["__Entity__"])[0] + ":"+n.id + " " + coalesce(n.description, "")], + relationships: [r in outside[0].rels | apoc.coll.removeAll(labels(startNode(r)),["__Entity__"])[0] + ":"+startNode(r).id+" "+type(r)+" "+apoc.coll.removeAll(labels(startNode(r)),["__Entity__"])[0] + ":"+endNode(r).id]} + } AS text, score, {entities:metadata} as metadata +""" + +LOCAL_COMMUNITY_DETAILS_QUERY_PREFIX = """ +UNWIND $entityIds as id +MATCH (node) WHERE elementId(node) = id +WITH node, 1.0 as score +""" +LOCAL_COMMUNITY_DETAILS_QUERY_SUFFIX = """ +WITH * +UNWIND chunks as c +MATCH (c)-[:PART_OF]->(d:Document) +RETURN [c {.*, embedding:null, fileName:d.fileName, fileSource:d.fileSource}] as chunks, +[community in communities | community {.*, embedding:null}] as communities, +[node in nodes+outside[0].nodes | {element_id:elementId(node), labels:labels(node), properties:{id:node.id,description:node.description}}] as nodes, +[r in rels+outside[0].rels | {startNode:{element_id:elementId(startNode(r)), labels:labels(startNode(r)), properties:{id:startNode(r).id,description:startNode(r).description}}, + endNode:{element_id:elementId(endNode(r)), labels:labels(endNode(r)), properties:{id:endNode(r).id,description:endNode(r).description}}, + relationship: {type:type(r), element_id:elementId(r)}}] as entities +""" + +CHAT_MODE_CONFIG_MAP= { + "vector": { + "retrieval_query": VECTOR_SEARCH_QUERY, + "index_name": "vector", + "keyword_index": None, + "document_filter": True + }, + "fulltext": { + "retrieval_query": VECTOR_SEARCH_QUERY, + "index_name": "vector", + "keyword_index": "keyword", + "document_filter": False + }, + "entity search+vector": { + "retrieval_query": LOCAL_COMMUNITY_SEARCH_QUERY.format(topChunks=LOCAL_COMMUNITY_TOP_CHUNKS, + topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, + topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS)+LOCAL_COMMUNITY_SEARCH_QUERY_SUFFIX, + "index_name": "entity_vector", + "keyword_index": None, + "document_filter": False + }, + "graph+vector": { + "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT), + "index_name": "vector", + "keyword_index": None, + "document_filter": True + }, + "graph+vector+fulltext": { + "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT), + "index_name": "vector", + "keyword_index": "keyword", + "document_filter": False + }, + "default": { + "retrieval_query": VECTOR_SEARCH_QUERY, + "index_name": "vector", + "keyword_index": None, + "document_filter": True + } + } YOUTUBE_CHUNK_SIZE_SECONDS = 60 QUERY_TO_GET_CHUNKS = """ diff --git a/experiments/graphrag.ipynb b/experiments/graphrag.ipynb new file mode 100644 index 000000000..a3f99796a --- /dev/null +++ b/experiments/graphrag.ipynb @@ -0,0 +1,3519 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install --quiet --upgrade langchain-community langchain-experimental langchain-openai neo4j graphdatascience tiktoken retry" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/workspaces/llm-graph-builder/chatbotenv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import getpass\n", + "import os\n", + "from langchain_community.graphs import Neo4jGraph\n", + "from graphdatascience import GraphDataScience\n", + "import pandas as pd\n", + "import numpy as np\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from src.llm import get_llm\n", + "\n", + "import os\n", + "import getpass\n", + "from neo4j import GraphDatabase, Result\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "import tiktoken\n", + "import numpy as np\n", + "from langchain_openai import OpenAIEmbeddings, ChatOpenAI\n", + "from langchain_community.vectorstores import Neo4jVector\n", + "from langchain_community.graphs import Neo4jGraph\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "# from llama_index.core.schema import TextNode\n", + "# from llama_index.core.vector_stores.utils import node_to_metadata_dict\n", + "# from llama_index.vector_stores.neo4jvector import Neo4jVectorStore\n", + "# from llama_index.core import VectorStoreIndex\n", + "from tqdm import tqdm\n", + "from src.shared.common_fn import load_embedding_model\n", + "\n", + "\n", + "from typing import Dict, Any\n", + "from concurrent.futures import ThreadPoolExecutor, as_completed\n", + "from tqdm import tqdm\n", + "from src.llm import get_llm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"NEO4J_URI\"] = \"\"\n", + "os.environ[\"NEO4J_USERNAME\"] = \"neo4j\"\n", + "os.environ[\"NEO4J_PASSWORD\"] = \"\"\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('advertisedListenAddress' returned by 'gds.debug.arrow' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: 'CALL gds.debug.arrow()'\n", + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('serverLocation' returned by 'gds.debug.arrow' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: 'CALL gds.debug.arrow()'\n" + ] + } + ], + "source": [ + "gds = GraphDataScience(\n", + " os.environ[\"NEO4J_URI\"],\n", + " auth=(os.environ[\"NEO4J_USERNAME\"], os.environ[\"NEO4J_PASSWORD\"])\n", + ")\n", + "\n", + "graph = Neo4jGraph() \n", + "\n", + "driver = GraphDatabase.driver(os.environ[\"NEO4J_URI\"], auth=(os.environ[\"NEO4J_USERNAME\"], os.environ[\"NEO4J_PASSWORD\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'type': 'node', 'total_count': 185, 'non_null_descriptions': 146},\n", + " {'type': 'relationship', 'total_count': 14614, 'non_null_descriptions': 41}]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graph.query(\"\"\"\n", + "MATCH (n:`__Entity__`)\n", + "RETURN \"node\" AS type,\n", + " count(*) AS total_count,\n", + " count(n.description) AS non_null_descriptions\n", + "UNION ALL\n", + "MATCH (n)-[r:!MENTIONS]->()\n", + "RETURN \"relationship\" AS type,\n", + " count(*) AS total_count,\n", + " count(r.description) AS non_null_descriptions\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('schema' returned by 'gds.graph.list' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: 'CALL gds.graph.list()'\n" + ] + } + ], + "source": [ + "available_graphs = gds.graph.list()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "available_graphs.drop([0,1],inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    degreeDistributiongraphNamedatabasedatabaseLocationmemoryUsagesizeInBytesnodeCountrelationshipCountconfigurationdensitycreationTimemodificationTimeschemaschemaWithOrientation
    \n", + "
    " + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [degreeDistribution, graphName, database, databaseLocation, memoryUsage, sizeInBytes, nodeCount, relationshipCount, configuration, density, creationTime, modificationTime, schema, schemaWithOrientation]\n", + "Index: []" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "available_graphs" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "available_graphs[\"graphName\"].str.contains('communities', regex=False).any()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('schema' returned by 'gds.graph.drop' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: 'CALL gds.graph.drop($graph_name, $fail_if_missing, $db_name)'\n" + ] + }, + { + "data": { + "text/plain": [ + "graphName communities\n", + "database neo4j\n", + "databaseLocation local\n", + "memoryUsage \n", + "sizeInBytes -1\n", + "nodeCount 185\n", + "relationshipCount 366\n", + "configuration {'relationshipProjection': {'_ALL_': {'aggrega...\n", + "density 0.010752\n", + "creationTime 2024-08-19T12:41:43.997435629+00:00\n", + "modificationTime 2024-08-19T12:41:44.127124307+00:00\n", + "schema {'graphProperties': {}, 'nodes': {'__Entity__'...\n", + "schemaWithOrientation {'graphProperties': {}, 'nodes': {'__Entity__'...\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gds.graph.drop(\"communities\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "G, result = gds.graph.project(\n", + " \"communities\", # Graph name\n", + " \"__Entity__\", # Node projection\n", + " {\n", + " \"_ALL_\": {\n", + " \"type\": \"*\",\n", + " \"orientation\": \"UNDIRECTED\",\n", + " \"properties\": {\"weight\": {\"property\": \"*\", \"aggregation\": \"COUNT\"}},\n", + " }\n", + " },\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "nodeProjection {'__Entity__': {'label': '__Entity__', 'proper...\n", + "relationshipProjection {'_ALL_': {'aggregation': 'DEFAULT', 'orientat...\n", + "graphName communities\n", + "nodeCount 185\n", + "relationshipCount 366\n", + "projectMillis 15\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('schema' returned by 'gds.graph.list' is deprecated.)} {position: line: 1, column: 106, offset: 105} for query: 'CALL gds.graph.list($graph_name) YIELD graphName, nodeCount, relationshipCount, database, configuration, schema, memoryUsage'\n" + ] + }, + { + "data": { + "text/plain": [ + "Graph({'graphName': 'communities', 'nodeCount': 185, 'relationshipCount': 366, 'database': 'neo4j', 'configuration': {'relationshipProjection': {'_ALL_': {'aggregation': 'DEFAULT', 'orientation': 'UNDIRECTED', 'indexInverse': False, 'properties': {'weight': {'aggregation': 'COUNT', 'property': '*', 'defaultValue': None}}, 'type': '*'}}, 'readConcurrency': 4, 'relationshipProperties': {}, 'nodeProperties': {}, 'jobId': 'ced0b057-a717-48d8-8b90-842f6f2aa001', 'nodeProjection': {'__Entity__': {'label': '__Entity__', 'properties': {}}}, 'logProgress': True, 'creationTime': neo4j.time.DateTime(2024, 8, 19, 13, 10, 3, 641071700, tzinfo=), 'validateRelationships': False, 'sudo': False}, 'schema': {'graphProperties': {}, 'nodes': {'__Entity__': {}}, 'relationships': {'_ALL_': {'weight': 'Float (DefaultValue(NaN), PERSISTENT, Aggregation.COUNT)'}}}, 'memoryUsage': '2316 KiB'})" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "G" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'COMMUNITY_PROJECT_NAME'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"community_project_name\".upper()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Component count: 14\n", + "Component distribution: {'min': 1, 'p5': 1, 'max': 155, 'p999': 155, 'p99': 155, 'p1': 1, 'p10': 1, 'p90': 3, 'p50': 1, 'p25': 1, 'p75': 2, 'p95': 155, 'mean': 12.285714285714286}\n" + ] + } + ], + "source": [ + "wcc = gds.wcc.stats(G)\n", + "print(f\"Component count: {wcc['componentCount']}\")\n", + "print(f\"Component distribution: {wcc['componentDistribution']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "writeMillis 5\n", + "nodePropertiesWritten 172\n", + "ranLevels 2\n", + "didConverge True\n", + "nodeCount 172\n", + "communityCount 22\n", + "communityDistribution {'min': 1, 'p5': 1, 'max': 64, 'p999': 64, 'p9...\n", + "modularity 0.678705\n", + "modularities [0.6566821889989846, 0.6787052274080039]\n", + "postProcessingMillis 3\n", + "preProcessingMillis 0\n", + "computeMillis 33\n", + "configuration {'writeProperty': 'communities', 'theta': 0.01...\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gds.leiden.write(\n", + " G,\n", + " writeProperty=\"communities\",\n", + " includeIntermediateCommunities=True,\n", + " relationshipWeightProperty=\"weight\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graph.query(\"CREATE CONSTRAINT IF NOT EXISTS FOR (c:__Community__) REQUIRE c.id IS UNIQUE;\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'count(*)': 344}]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graph.query(\"\"\"\n", + "MATCH (e:`__Entity__`)\n", + "UNWIND range(0, size(e.communities) - 1 , 1) AS index\n", + "CALL {\n", + " WITH e, index\n", + " WITH e, index\n", + " WHERE index = 0\n", + " MERGE (c:`__Community__` {id: toString(index) + '-' + toString(e.communities[index])})\n", + " ON CREATE SET c.level = index\n", + " MERGE (e)-[:IN_COMMUNITY]->(c)\n", + " RETURN count(*) AS count_0\n", + "}\n", + "CALL {\n", + " WITH e, index\n", + " WITH e, index\n", + " WHERE index > 0\n", + " MERGE (current:`__Community__` {id: toString(index) + '-' + toString(e.communities[index])})\n", + " ON CREATE SET current.level = index\n", + " MERGE (previous:`__Community__` {id: toString(index - 1) + '-' + toString(e.communities[index - 1])})\n", + " ON CREATE SET previous.level = index - 1\n", + " MERGE (previous)-[:IN_COMMUNITY]->(current)\n", + " RETURN count(*) AS count_1\n", + "}\n", + "RETURN count(*)\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graph.query(\"\"\"\n", + "MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(:__Entity__)<-[:MENTIONS]-(d:Document)\n", + "WITH c, count(distinct d) AS rank\n", + "SET c.community_rank = rank;\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    LevelNumber of communities25th Percentile50th Percentile75th Percentile90th Percentile99th PercentileMax
    00281.02.03.7513.949.6661
    11221.02.08.5017.854.5564
    \n", + "
    " + ], + "text/plain": [ + " Level Number of communities 25th Percentile 50th Percentile \\\n", + "0 0 28 1.0 2.0 \n", + "1 1 22 1.0 2.0 \n", + "\n", + " 75th Percentile 90th Percentile 99th Percentile Max \n", + "0 3.75 13.9 49.66 61 \n", + "1 8.50 17.8 54.55 64 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "community_size = graph.query(\n", + " \"\"\"\n", + "MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(e:__Entity__)\n", + "WITH c, count(distinct e) AS entities\n", + "RETURN split(c.id, '-')[0] AS level, entities\n", + "\"\"\"\n", + ")\n", + "community_size_df = pd.DataFrame.from_records(community_size)\n", + "percentiles_data = []\n", + "for level in community_size_df[\"level\"].unique():\n", + " subset = community_size_df[community_size_df[\"level\"] == level][\"entities\"]\n", + " num_communities = len(subset)\n", + " percentiles = np.percentile(subset, [25, 50, 75, 90, 99])\n", + " percentiles_data.append(\n", + " [\n", + " level,\n", + " num_communities,\n", + " percentiles[0],\n", + " percentiles[1],\n", + " percentiles[2],\n", + " percentiles[3],\n", + " percentiles[4],\n", + " max(subset)\n", + " ]\n", + " )\n", + "\n", + "# Create a DataFrame with the percentiles\n", + "percentiles_df = pd.DataFrame(\n", + " percentiles_data,\n", + " columns=[\n", + " \"Level\",\n", + " \"Number of communities\",\n", + " \"25th Percentile\",\n", + " \"50th Percentile\",\n", + " \"75th Percentile\",\n", + " \"90th Percentile\",\n", + " \"99th Percentile\",\n", + " \"Max\"\n", + " ],\n", + ")\n", + "percentiles_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### community summaries" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "community_info = graph.query(\"\"\"\n", + "MATCH (c:`__Community__`)<-[:IN_COMMUNITY*]-(e:__Entity__)\n", + "WHERE c.level IN [0,1,4]\n", + "WITH c, collect(e ) AS nodes\n", + "WHERE size(nodes) > 1\n", + "CALL apoc.path.subgraphAll(nodes[0], {\n", + "\twhitelistNodes:nodes\n", + "})\n", + "YIELD relationships\n", + "RETURN c.id AS communityId,\n", + " [n in nodes | {id: n.id, description: n.description, type: [el in labels(n) WHERE el <> '__Entity__'][0]}] AS nodes,\n", + " [r in relationships | {start: startNode(r).id, type: type(r), end: endNode(r).id, description: r.description}] AS rels\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'communityId': '0-36',\n", + " 'nodes': [{'id': 'India',\n", + " 'description': 'Officially the Republic of India, located in South Asia.',\n", + " 'type': 'Country'},\n", + " {'id': 'Supreme Court Of India',\n", + " 'description': 'Head of the independent judiciary.',\n", + " 'type': 'Judiciary'},\n", + " {'id': 'Indian National Congress',\n", + " 'description': 'Dominated Indian politics until 1977.',\n", + " 'type': 'Political party'},\n", + " {'id': 'Sindhu',\n", + " 'description': 'Name of the Indus River, from which the name India is derived.',\n", + " 'type': 'River'},\n", + " {'id': 'Food And Agriculture Organization Of The United Nations',\n", + " 'description': \"Estimates India's forest cover.\",\n", + " 'type': 'Organization'},\n", + " {'id': '2013 Forest Survey Of India',\n", + " 'description': \"States India's forest cover increased to 69.8 million hectares by 2012.\",\n", + " 'type': 'Report'},\n", + " {'id': 'Yajnavalkya Smriti',\n", + " 'description': 'Prohibited the cutting of trees.',\n", + " 'type': 'Literature'},\n", + " {'id': 'Kautalyas Arthashastra',\n", + " 'description': 'Discusses the need for forest administration.',\n", + " 'type': 'Literature'},\n", + " {'id': 'Crown Land (Encroachment) Ordinance',\n", + " 'description': \"Promulgated in 1840 targeting forests in Britain's Asian colonies.\",\n", + " 'type': 'Legislation'},\n", + " {'id': 'Indian Forest Act Of 1865',\n", + " 'description': \"Established the government's claims over forests.\",\n", + " 'type': 'Legislation'},\n", + " {'id': 'Forest Act Of 1878',\n", + " 'description': 'Gave the British government control over all wastelands, including forests.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'Sir Dietrich Brandis',\n", + " 'description': 'Inspector General of Forests in India from 1864 to 1883.',\n", + " 'type': 'Person'},\n", + " {'id': 'Indian Ocean',\n", + " 'description': 'Ocean bounded by India on the north.',\n", + " 'type': 'Ocean'},\n", + " {'id': 'Sri Lanka',\n", + " 'description': 'Country in the vicinity of India in the Indian Ocean.',\n", + " 'type': 'Country'},\n", + " {'id': 'Maldives',\n", + " 'description': 'Country in the vicinity of India in the Indian Ocean.',\n", + " 'type': 'Country'},\n", + " {'id': 'Myanmar',\n", + " 'description': 'Country sharing land border with India to the east.',\n", + " 'type': 'Country'},\n", + " {'id': 'Himalayan Mountain Range',\n", + " 'description': 'Defines the northern frontiers of India.',\n", + " 'type': 'Mountain range'},\n", + " {'id': 'China',\n", + " 'description': 'Country sharing land border with India to the north.',\n", + " 'type': 'Country'},\n", + " {'id': 'Bhutan',\n", + " 'description': 'Country sharing land border with India to the north.',\n", + " 'type': 'Country'},\n", + " {'id': 'Nepal',\n", + " 'description': 'Country sharing land border with India to the north.',\n", + " 'type': 'Country'},\n", + " {'id': 'Pakistan',\n", + " 'description': 'Country sharing land border with India to the west.',\n", + " 'type': 'Country'},\n", + " {'id': 'Bangladesh',\n", + " 'description': 'Country sharing land border with India to the east.',\n", + " 'type': 'Country'},\n", + " {'id': 'Ganges',\n", + " 'description': 'The longest river originating in India.',\n", + " 'type': 'River'},\n", + " {'id': 'Western Ghats',\n", + " 'description': 'One of the three biodiversity hotspots in India.',\n", + " 'type': 'Biodiversity hotspot'},\n", + " {'id': 'Eastern Himalayas',\n", + " 'description': 'One of the three biodiversity hotspots in India.',\n", + " 'type': 'Biodiversity hotspot'},\n", + " {'id': 'Indo-Burma Hotspot',\n", + " 'description': 'One of the three biodiversity hotspots in India.',\n", + " 'type': 'Biodiversity hotspot'},\n", + " {'id': 'Forests',\n", + " 'description': 'Covers about 24.6% of the total land area.',\n", + " 'type': 'Ecosystem'},\n", + " {'id': 'Indomalayan Realm',\n", + " 'description': 'Geographical realm that includes India.',\n", + " 'type': 'Realm'},\n", + " {'id': 'World', 'description': None, 'type': None},\n", + " {'id': 'Public_Sector', 'description': None, 'type': None},\n", + " {'id': 'Gdp', 'description': None, 'type': None},\n", + " {'id': 'Economic_Liberalisation', 'description': None, 'type': None},\n", + " {'id': 'Gdp_Growth', 'description': None, 'type': None},\n", + " {'id': 'Consumer_Market', 'description': None, 'type': None},\n", + " {'id': 'Wto', 'description': None, 'type': None},\n", + " {'id': 'Ease_Of_Business_Index', 'description': None, 'type': None},\n", + " {'id': 'Global_Competitiveness_Index', 'description': None, 'type': None},\n", + " {'id': 'Billionaires', 'description': None, 'type': None},\n", + " {'id': 'Welfare_State', 'description': None, 'type': None},\n", + " {'id': 'Socialist_State', 'description': None, 'type': None},\n", + " {'id': 'Social_Welfare_Spending', 'description': None, 'type': None},\n", + " {'id': 'Labour_Force', 'description': None, 'type': None},\n", + " {'id': 'Fdi', 'description': None, 'type': None},\n", + " {'id': 'Free_Trade_Agreements', 'description': None, 'type': None},\n", + " {'id': 'Service_Sector', 'description': None, 'type': None},\n", + " {'id': 'Stock_Exchanges', 'description': None, 'type': None},\n", + " {'id': 'Manufacturing', 'description': None, 'type': None},\n", + " {'id': 'Population', 'description': None, 'type': None},\n", + " {'id': 'Unemployment', 'description': None, 'type': None},\n", + " {'id': 'Savings_Rate', 'description': None, 'type': None},\n", + " {'id': 'Arabian Sea',\n", + " 'description': 'Sea bounded by India on the northwest.',\n", + " 'type': 'Sea'},\n", + " {'id': 'Bay Of Bengal',\n", + " 'description': 'Bay bounded by India on the southeast.',\n", + " 'type': 'Bay'},\n", + " {'id': 'Africa',\n", + " 'description': 'Continent from which modern humans arrived on the Indian subcontinent.',\n", + " 'type': 'Continent'},\n", + " {'id': 'Indus Valley Civilisation',\n", + " 'description': 'Civilisation that evolved from settled life in the western margins of the Indus river basin.',\n", + " 'type': 'Civilization'},\n", + " {'id': 'Sanskrit',\n", + " 'description': 'Indo-European language that diffused into India from the northwest.',\n", + " 'type': 'Language'},\n", + " {'id': 'Rigveda',\n", + " 'description': 'Ancient hymns recording the dawning of Hinduism in India.',\n", + " 'type': 'Text'},\n", + " {'id': 'Hinduism',\n", + " 'description': 'Major religion in India, with roots in the Rigveda.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Buddhism',\n", + " 'description': 'Religion that arose in India, proclaiming social orders unlinked to heredity.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Jainism',\n", + " 'description': 'Religion that arose in India, proclaiming social orders unlinked to heredity.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Mughal Empire',\n", + " 'description': 'Empire that began in 1526, known for its architecture and relative peace.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Indian Republic',\n", + " 'description': 'India has been a federal republic since 1950.',\n", + " 'type': 'Government'}],\n", + " 'rels': [{'start': 'India',\n", + " 'description': None,\n", + " 'type': 'IS_PART_OF',\n", + " 'end': 'Indomalayan Realm'},\n", + " {'start': 'India',\n", + " 'description': 'About 1,900 public sector companies with complete control and ownership of railways, highways, and majority control in various industries.',\n", + " 'type': 'HAS',\n", + " 'end': 'Public_Sector'},\n", + " {'start': 'India',\n", + " 'description': \"One of the world's highest number of billionaires.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Billionaires'},\n", + " {'start': 'India',\n", + " 'description': \"World's second-largest labour force with 586 million workers.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Labour_Force'},\n", + " {'start': 'India',\n", + " 'description': 'Has free trade agreements with several nations and blocs.',\n", + " 'type': 'HAS',\n", + " 'end': 'Free_Trade_Agreements'},\n", + " {'start': 'India',\n", + " 'description': \"Bombay Stock Exchange and National Stock Exchange are among the world's largest stock exchanges.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Stock_Exchanges'},\n", + " {'start': 'India',\n", + " 'description': \"Nearly 65% of India's population is rural.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Population'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'RELATES_TO',\n", + " 'end': 'Rigveda'},\n", + " {'start': 'India',\n", + " 'description': 'Member of the World Trade Organization since 1 January 1995.',\n", + " 'type': 'MEMBER_OF',\n", + " 'end': 'Wto'},\n", + " {'start': 'India',\n", + " 'description': 'High unemployment and rising income inequality.',\n", + " 'type': 'FACES',\n", + " 'end': 'Unemployment'},\n", + " {'start': 'India',\n", + " 'description': 'Adopted broad economic liberalisation in 1991.',\n", + " 'type': 'ADOPTED',\n", + " 'end': 'Economic_Liberalisation'},\n", + " {'start': 'India',\n", + " 'description': 'Often considered a welfare state.',\n", + " 'type': 'CONSIDERED_AS',\n", + " 'end': 'Welfare_State'},\n", + " {'start': 'India', 'description': None, 'type': 'NEAR', 'end': 'Sri Lanka'},\n", + " {'start': 'India', 'description': None, 'type': 'NEAR', 'end': 'Maldives'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Indian Ocean'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Myanmar'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Himalayan Mountain Range'},\n", + " {'start': 'India', 'description': None, 'type': 'BORDERS', 'end': 'China'},\n", + " {'start': 'India', 'description': None, 'type': 'BORDERS', 'end': 'Bhutan'},\n", + " {'start': 'India', 'description': None, 'type': 'BORDERS', 'end': 'Nepal'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Pakistan'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Bangladesh'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Arabian Sea'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Bay Of Bengal'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'SEPARATED_BY',\n", + " 'end': 'Sri Lanka'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'DERIVED_FROM',\n", + " 'end': 'Sindhu'},\n", + " {'start': 'India',\n", + " 'description': \"Estimates India's forest cover.\",\n", + " 'type': 'ESTIMATES',\n", + " 'end': 'Food And Agriculture Organization Of The United Nations'},\n", + " {'start': 'India',\n", + " 'description': 'Makes up more than 50% of GDP.',\n", + " 'type': 'MAKES_UP',\n", + " 'end': 'Service_Sector'},\n", + " {'start': 'India',\n", + " 'description': 'Fifth-largest economy by nominal GDP and third-largest by purchasing power parity (PPP).',\n", + " 'type': 'RANKS_AS',\n", + " 'end': 'World'},\n", + " {'start': 'India',\n", + " 'description': 'Fourth-largest consumer market in the world.',\n", + " 'type': 'RANKS_AS',\n", + " 'end': 'Consumer_Market'},\n", + " {'start': 'India',\n", + " 'description': \"World's sixth-largest manufacturer, representing 2.6% of global manufacturing output.\",\n", + " 'type': 'RANKS_AS',\n", + " 'end': 'Manufacturing'},\n", + " {'start': 'India',\n", + " 'description': 'Officially declared a socialist state as per the constitution.',\n", + " 'type': 'DECLARED_AS',\n", + " 'end': 'Socialist_State'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Hinduism'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Buddhism'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Jainism'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'GOVERNED_AS',\n", + " 'end': 'Indian Republic'},\n", + " {'start': 'India',\n", + " 'description': '136th by GDP (nominal) and 125th by GDP (PPP) on a per capita income basis.',\n", + " 'type': 'RANKS',\n", + " 'end': 'Gdp'},\n", + " {'start': 'India',\n", + " 'description': '63rd on the Ease of Doing Business index.',\n", + " 'type': 'RANKS',\n", + " 'end': 'Ease_Of_Business_Index'},\n", + " {'start': 'India',\n", + " 'description': '40th on the Global Competitiveness Index.',\n", + " 'type': 'RANKS',\n", + " 'end': 'Global_Competitiveness_Index'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'SHARES_BORDERS_WITH',\n", + " 'end': 'Myanmar'},\n", + " {'start': 'India',\n", + " 'description': 'Overall social welfare spending stood at 8.6% of GDP in 2021-22.',\n", + " 'type': 'SPENDING',\n", + " 'end': 'Social_Welfare_Spending'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_EVENT',\n", + " 'end': 'Mughal Empire'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGINATES_IN',\n", + " 'end': 'Ganges'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_BIODIVERSITY_HOTSPOT',\n", + " 'end': 'Western Ghats'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_BIODIVERSITY_HOTSPOT',\n", + " 'end': 'Eastern Himalayas'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_BIODIVERSITY_HOTSPOT',\n", + " 'end': 'Indo-Burma Hotspot'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_ECOSYSTEM',\n", + " 'end': 'Forests'},\n", + " {'start': 'India',\n", + " 'description': '6% to 7% since the start of the 21st century.',\n", + " 'type': 'AVERAGE_ANNUAL_GROWTH',\n", + " 'end': 'Gdp_Growth'},\n", + " {'start': 'India',\n", + " 'description': 'Foreign direct investment in 2021-22 was $82 billion.',\n", + " 'type': 'FOREIGN_DIRECT_INVESTMENT',\n", + " 'end': 'Fdi'},\n", + " {'start': 'India',\n", + " 'description': 'Gross domestic savings rate stood at 29.3% of GDP in 2022.',\n", + " 'type': 'GROSS_DOMESTIC_SAVINGS_RATE',\n", + " 'end': 'Savings_Rate'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HUMAN_ORIGIN',\n", + " 'end': 'Africa'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_DEVELOPMENT',\n", + " 'end': 'Indus Valley Civilisation'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'LANGUAGE_DIFFUSION',\n", + " 'end': 'Sanskrit'},\n", + " {'start': 'Kautalyas Arthashastra',\n", + " 'description': 'Discusses the need for forest administration.',\n", + " 'type': 'DISCUSSES',\n", + " 'end': 'India'},\n", + " {'start': 'Indian Forest Act Of 1865',\n", + " 'description': \"Establishes government's claims over forests.\",\n", + " 'type': 'ESTABLISHES',\n", + " 'end': 'India'},\n", + " {'start': 'Crown Land (Encroachment) Ordinance',\n", + " 'description': \"Targets forests in Britain's Asian colonies.\",\n", + " 'type': 'TARGETS',\n", + " 'end': 'India'},\n", + " {'start': 'Yajnavalkya Smriti',\n", + " 'description': 'Prohibits the cutting of trees.',\n", + " 'type': 'PROHIBITS',\n", + " 'end': 'India'},\n", + " {'start': '2013 Forest Survey Of India',\n", + " 'description': 'Reports on forest cover increase.',\n", + " 'type': 'REPORTS_ON',\n", + " 'end': 'India'},\n", + " {'start': 'Supreme Court Of India',\n", + " 'description': None,\n", + " 'type': 'PART_OF_JUDICIARY',\n", + " 'end': 'India'},\n", + " {'start': 'Indian National Congress',\n", + " 'description': None,\n", + " 'type': 'HISTORICALLY_DOMINANT_PARTY',\n", + " 'end': 'India'},\n", + " {'start': 'Forest Act Of 1878',\n", + " 'description': 'Gives control over all wastelands, including forests.',\n", + " 'type': 'GIVES_CONTROL',\n", + " 'end': 'India'},\n", + " {'start': 'Sir Dietrich Brandis',\n", + " 'description': 'Inspector General of Forests in India from 1864 to 1883.',\n", + " 'type': 'INSPECTOR_GENERAL_OF',\n", + " 'end': 'India'}]},\n", + " {'communityId': '0-34',\n", + " 'nodes': [{'id': 'Constitution Of India',\n", + " 'description': 'The supreme law of India, laying down the framework for government institutions and fundamental rights.',\n", + " 'type': 'Document'},\n", + " {'id': 'Parliament',\n", + " 'description': 'Cannot override the Constitution of India.',\n", + " 'type': 'Institution'},\n", + " {'id': 'Government Of India Act 1935',\n", + " 'description': 'Replaced by the Constitution of India.',\n", + " 'type': 'Document'},\n", + " {'id': 'Constituent Assembly',\n", + " 'description': 'Tasked with drafting the Constitution of India.',\n", + " 'type': 'Government body'},\n", + " {'id': 'M. N. Roy',\n", + " 'description': 'Proposed the framework for the Constitution of India.',\n", + " 'type': 'Person'},\n", + " {'id': 'Republic Day',\n", + " 'description': 'Celebrated on 26 January in India to honor the Constitution.',\n", + " 'type': 'Event'},\n", + " {'id': 'Old Parliament House',\n", + " 'description': 'Preserves the original 1950 Constitution in a nitrogen-filled case.',\n", + " 'type': 'Location'},\n", + " {'id': 'British Government',\n", + " 'description': 'Responsible for the external security of India during its dominion status.',\n", + " 'type': 'Institution'},\n", + " {'id': 'Indian Independence Act 1947',\n", + " 'description': 'Repealed by the Constitution of India.',\n", + " 'type': 'Document'},\n", + " {'id': 'Articles Of The Constitution',\n", + " 'description': 'Articles 5, 6, 7, 8, 9, 60, 324, 366, 367, 379, 380, 388, 391, 392, 393, and 394 came into force on 26 November 1949.',\n", + " 'type': 'Document'}],\n", + " 'rels': [{'start': 'Constitution Of India',\n", + " 'description': 'The Constitution is based on the proposal suggested by M. N. Roy.',\n", + " 'type': 'BASED_ON',\n", + " 'end': 'M. N. Roy'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'The original 1950 Constitution is preserved in a nitrogen-filled case.',\n", + " 'type': 'PRESERVED_IN',\n", + " 'end': 'Old Parliament House'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Celebrated on 26 January.',\n", + " 'type': 'CELEBRATED_ON',\n", + " 'end': 'Republic Day'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Parliament cannot override the Constitution.',\n", + " 'type': 'CANNOT_OVERRIDE',\n", + " 'end': 'Parliament'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Certain articles came into force on 26 November 1949.',\n", + " 'type': 'CAME_INTO_FORCE',\n", + " 'end': 'Articles Of The Constitution'},\n", + " {'start': 'Government Of India Act 1935',\n", + " 'description': 'Replaced by the Constitution of India.',\n", + " 'type': 'REPLACED_BY',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'British Government',\n", + " 'description': 'Responsible for the external security of India during its dominion status.',\n", + " 'type': 'RESPONSIBLE_FOR',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'Indian Independence Act 1947',\n", + " 'description': 'Repealed by the Constitution of India.',\n", + " 'type': 'REPEALED_BY',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'Constituent Assembly',\n", + " 'description': None,\n", + " 'type': 'DRAFTED_BY',\n", + " 'end': 'Constitution Of India'}]},\n", + " {'communityId': '0-25',\n", + " 'nodes': [{'id': 'President Of India',\n", + " 'description': 'The Supreme Commander of the Indian Armed Forces.',\n", + " 'type': 'Person'},\n", + " {'id': 'Nda',\n", + " 'description': 'A coalition of the BJP and its allies governing India since 2014.',\n", + " 'type': 'Political alliance'},\n", + " {'id': 'Government Of India',\n", + " 'description': 'Established a system of national parks and protected areas.',\n", + " 'type': 'Government'},\n", + " {'id': 'Narendra Modi',\n", + " 'description': 'Current Prime Minister of India since 26 May 2014.',\n", + " 'type': 'Person'},\n", + " {'id': 'New Delhi',\n", + " 'description': 'The seat of the Government of India.',\n", + " 'type': 'Location'},\n", + " {'id': 'Supreme Court',\n", + " 'description': 'The highest judicial forum and final court of appeal under the Constitution of India.',\n", + " 'type': 'Judiciary'},\n", + " {'id': 'Indian Councils Act 1909',\n", + " 'description': 'Introduced elections to the Imperial Legislative Council.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'Government Of India Act 1919',\n", + " 'description': 'Expanded the Imperial Legislative Council.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'National Parks',\n", + " 'description': 'Established in 1935 and expanded to nearly 1022 by 2023.',\n", + " 'type': 'Protected area'},\n", + " {'id': 'Wildlife Protection Act Of 1972',\n", + " 'description': 'Enacted for the protection of wildlife.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'Project Tiger',\n", + " 'description': 'Special project for the protection of critical species.',\n", + " 'type': 'Conservation project'},\n", + " {'id': 'Project Elephant',\n", + " 'description': 'Special project for the protection of critical species.',\n", + " 'type': 'Conservation project'},\n", + " {'id': 'Project Dolphin',\n", + " 'description': 'Special project for the protection of critical species.',\n", + " 'type': 'Conservation project'}],\n", + " 'rels': [{'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'INITIATED',\n", + " 'end': 'Project Tiger'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'INITIATED',\n", + " 'end': 'Project Elephant'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'INITIATED',\n", + " 'end': 'Project Dolphin'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'GOVERNED_BY',\n", + " 'end': 'Nda'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'ENACTED',\n", + " 'end': 'Wildlife Protection Act Of 1972'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'ESTABLISHED',\n", + " 'end': 'National Parks'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'LEAD_BY',\n", + " 'end': 'Narendra Modi'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'RESPONSIBLE_TO',\n", + " 'end': 'Supreme Court'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'SEATED_IN',\n", + " 'end': 'New Delhi'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'HEAD_OF_STATE',\n", + " 'end': 'President Of India'},\n", + " {'start': 'Indian Councils Act 1909',\n", + " 'description': None,\n", + " 'type': 'INFLUENCED',\n", + " 'end': 'Government Of India'},\n", + " {'start': 'Government Of India Act 1919',\n", + " 'description': None,\n", + " 'type': 'INFLUENCED',\n", + " 'end': 'Government Of India'}]},\n", + " {'communityId': '0-112',\n", + " 'nodes': [{'id': 'Prime Minister Of India',\n", + " 'description': 'Holds executive authority and responsibility for national security.',\n", + " 'type': 'Person'},\n", + " {'id': 'Indian Armed Forces',\n", + " 'description': 'The military forces of the Republic of India.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Army',\n", + " 'description': 'One of the three professional uniformed services of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Navy',\n", + " 'description': 'One of the three professional uniformed services of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Air Force',\n", + " 'description': 'One of the three professional uniformed services of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Central Armed Police Forces',\n", + " 'description': 'Supports the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Coast Guard',\n", + " 'description': 'Supports the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Special Frontier Force',\n", + " 'description': 'Supports the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Strategic Forces Command',\n", + " 'description': 'An inter-service command of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Andaman And Nicobar Command',\n", + " 'description': 'An inter-service command of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Integrated Defence Staff',\n", + " 'description': 'An inter-service command of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Ministry Of Defence',\n", + " 'description': 'Manages the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Global Firepower Index',\n", + " 'description': 'Ranks the military forces globally.',\n", + " 'type': 'Report'},\n", + " {'id': 'Armed Forces Flag Day',\n", + " 'description': 'Honors armed forces and military personnel annually.',\n", + " 'type': 'Event'},\n", + " {'id': 'Mauryan Empire',\n", + " 'description': 'An ancient Indian empire known for its powerful military.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Chola Empire',\n", + " 'description': 'Known for foreign trade and maritime activity.',\n", + " 'type': 'Empire'}],\n", + " 'rels': [{'start': 'Prime Minister Of India',\n", + " 'description': None,\n", + " 'type': 'EXECUTIVE_AUTHORITY',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Indian Army'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Indian Navy'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Indian Air Force'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Central Armed Police Forces'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Indian Coast Guard'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Special Frontier Force'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Strategic Forces Command'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Andaman And Nicobar Command'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Integrated Defence Staff'},\n", + " {'start': 'Ministry Of Defence',\n", + " 'description': None,\n", + " 'type': 'MANAGES',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Global Firepower Index',\n", + " 'description': None,\n", + " 'type': 'RANKS',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Armed Forces Flag Day',\n", + " 'description': None,\n", + " 'type': 'HONORS',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Mauryan Empire',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_REFERENCE',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Chola Empire',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_REFERENCE',\n", + " 'end': 'Indian Armed Forces'}]},\n", + " {'communityId': '0-27',\n", + " 'nodes': [{'id': 'Rajya Sabha',\n", + " 'description': 'The mostly indirectly elected upper house of Parliament.',\n", + " 'type': 'Government body'},\n", + " {'id': 'Lok Sabha',\n", + " 'description': 'The directly elected lower house of Parliament.',\n", + " 'type': 'Government body'},\n", + " {'id': 'Union Council Of Ministers',\n", + " 'description': 'The executive decision-making committee of the government.',\n", + " 'type': 'Government body'}],\n", + " 'rels': [{'start': 'Union Council Of Ministers',\n", + " 'description': None,\n", + " 'type': 'RESPONSIBLE_TO',\n", + " 'end': 'Rajya Sabha'},\n", + " {'start': 'Union Council Of Ministers',\n", + " 'description': None,\n", + " 'type': 'RESPONSIBLE_TO',\n", + " 'end': 'Lok Sabha'}]},\n", + " {'communityId': '0-11',\n", + " 'nodes': [{'id': 'Republic Of India',\n", + " 'description': 'Has two principal official short names, India and Bharat.',\n", + " 'type': 'Country'},\n", + " {'id': 'Hindustan',\n", + " 'description': 'Commonly used name for the Republic of India.',\n", + " 'type': 'Name'}],\n", + " 'rels': [{'start': 'Republic Of India',\n", + " 'description': None,\n", + " 'type': 'HAS_NAME',\n", + " 'end': 'Hindustan'}]},\n", + " {'communityId': '0-15',\n", + " 'nodes': [{'id': 'Bharat',\n", + " 'description': 'Another principal official short name of the Republic of India.',\n", + " 'type': 'Name'},\n", + " {'id': 'Dushyanta',\n", + " 'description': 'Father of Bharata, associated with the name Bhāratavarṣa.',\n", + " 'type': 'Person'},\n", + " {'id': 'Mahabharata',\n", + " 'description': 'Epic associated with the name Bharata.',\n", + " 'type': 'Literature'}],\n", + " 'rels': [{'start': 'Bharat',\n", + " 'description': None,\n", + " 'type': 'ASSOCIATED_WITH',\n", + " 'end': 'Dushyanta'},\n", + " {'start': 'Bharat',\n", + " 'description': None,\n", + " 'type': 'ASSOCIATED_WITH',\n", + " 'end': 'Mahabharata'}]},\n", + " {'communityId': '0-14',\n", + " 'nodes': [{'id': 'Bhāratavarṣa',\n", + " 'description': 'Term used in the first century AD, derived from the name of the Vedic community of Bharatas.',\n", + " 'type': 'Historical term'},\n", + " {'id': 'Bharatas',\n", + " 'description': 'Mentioned in the Rigveda as one of the principal kingdoms of the Aryavarta.',\n", + " 'type': 'Vedic community'}],\n", + " 'rels': [{'start': 'Bhāratavarṣa',\n", + " 'description': None,\n", + " 'type': 'DERIVED_FROM',\n", + " 'end': 'Bharatas'}]},\n", + " {'communityId': '0-39',\n", + " 'nodes': [{'id': 'Thailand',\n", + " 'description': \"Shares maritime borders with India's Andaman and Nicobar Islands.\",\n", + " 'type': 'Country'},\n", + " {'id': 'Indonesia',\n", + " 'description': \"Shares maritime borders with India's Andaman and Nicobar Islands.\",\n", + " 'type': 'Country'},\n", + " {'id': 'Andaman And Nicobar Islands',\n", + " 'description': 'Islands of India sharing a maritime border with Thailand, Myanmar, and Indonesia.',\n", + " 'type': 'Island'}],\n", + " 'rels': [{'start': 'Andaman And Nicobar Islands',\n", + " 'description': None,\n", + " 'type': 'MARITIME_BORDER',\n", + " 'end': 'Thailand'},\n", + " {'start': 'Andaman And Nicobar Islands',\n", + " 'description': None,\n", + " 'type': 'MARITIME_BORDER',\n", + " 'end': 'Indonesia'}]},\n", + " {'communityId': '0-52',\n", + " 'nodes': [{'id': 'Constituent Assembly Of India',\n", + " 'description': 'Adopted the Constitution of India on 26 November 1949.',\n", + " 'type': 'Institution'},\n", + " {'id': 'Sardar Patel',\n", + " 'description': 'Convinced princely states to sign articles of integration with India.',\n", + " 'type': 'Person'},\n", + " {'id': 'V. P. Menon',\n", + " 'description': 'Convinced princely states to sign articles of integration with India.',\n", + " 'type': 'Person'}],\n", + " 'rels': [{'start': 'Sardar Patel',\n", + " 'description': 'Convinced princely states to sign articles of integration.',\n", + " 'type': 'CONVINCED',\n", + " 'end': 'Constituent Assembly Of India'},\n", + " {'start': 'V. P. Menon',\n", + " 'description': 'Convinced princely states to sign articles of integration.',\n", + " 'type': 'CONVINCED',\n", + " 'end': 'Constituent Assembly Of India'}]},\n", + " {'communityId': '0-78',\n", + " 'nodes': [{'id': 'Fauna Species',\n", + " 'description': 'India has an estimated 92,873 species of fauna.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Insects',\n", + " 'description': 'Major category with 63,423 recorded species.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Mammals Count',\n", + " 'description': '423 mammals in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Birds Count',\n", + " 'description': '1,233 birds in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Reptiles Count',\n", + " 'description': '526 reptiles in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Amphibians Count',\n", + " 'description': '342 amphibians in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Fish Count',\n", + " 'description': '3,022 fish in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Mammals',\n", + " 'description': '12.6% of mammals are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Birds',\n", + " 'description': '4.5% of birds are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Reptiles',\n", + " 'description': '45.8% of reptiles are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Amphibians',\n", + " 'description': '55.8% of amphibians are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Large Animals',\n", + " 'description': 'Includes Indian elephant, Indian rhinoceros, and Gaur.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Big Cats',\n", + " 'description': 'Includes tiger and lion.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Cat Family',\n", + " 'description': 'Includes Bengal tiger, Asiatic lion, Indian leopard, snow leopard, and clouded leopard.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Representative Species',\n", + " 'description': 'Includes blackbuck, nilgai, bharal, barasingha, Nilgiri tahr, and Nilgiri langur.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Aquatic Mammals',\n", + " 'description': 'Includes dolphins, whales, porpoises, and dugong.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Reptiles Species',\n", + " 'description': 'Includes gharial and saltwater crocodiles.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Birds Species',\n", + " 'description': 'Includes peafowl, pheasants, geese, ducks, mynas, parakeets, pigeons, cranes, hornbills, and sunbirds.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Bird Species',\n", + " 'description': 'Includes great Indian hornbill, great Indian bustard, nicobar pigeon, ruddy shelduck, Himalayan monal, and Himalayan quail.',\n", + " 'type': 'Fauna'}],\n", + " 'rels': [{'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Insects'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Mammals Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Birds Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Reptiles Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Amphibians Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Fish Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Mammals'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Birds'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Reptiles'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Amphibians'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Large Animals'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Big Cats'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Cat Family'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Representative Species'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Aquatic Mammals'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Reptiles Species'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Birds Species'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Bird Species'}]},\n", + " {'communityId': '0-110',\n", + " 'nodes': [{'id': 'Plant Species',\n", + " 'description': 'About 29,015 species of plants.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Flowering Plants Count',\n", + " 'description': '17,926 species of flowering plants.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Endemic Plant Species',\n", + " 'description': '6,842 species are endemic to India.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Algae', 'description': '7,244 species.', 'type': 'Flora'},\n", + " {'id': 'Bryophytes', 'description': '2,504 species.', 'type': 'Flora'},\n", + " {'id': 'Pteridophytes', 'description': '1,267 species.', 'type': 'Flora'},\n", + " {'id': 'Gymnosperms', 'description': '74 species.', 'type': 'Flora'},\n", + " {'id': 'Fungal Diversity',\n", + " 'description': 'Over 27,000 recorded species.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Flora', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Plant Species'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Flowering Plants Count'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Plant Species'},\n", + " {'start': 'Flora', 'description': None, 'type': 'INCLUDES', 'end': 'Algae'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Bryophytes'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Pteridophytes'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Gymnosperms'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Fungal Diversity'}]},\n", + " {'communityId': '0-105',\n", + " 'nodes': [{'id': 'Threatened Species',\n", + " 'description': '172 IUCN-designated threatened species.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Mammals',\n", + " 'description': '39 species of mammals.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Birds',\n", + " 'description': '72 species of birds.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Reptiles',\n", + " 'description': '17 species of reptiles.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Amphibians',\n", + " 'description': '3 species of amphibians.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Fish',\n", + " 'description': '2 species of fish.',\n", + " 'type': 'Fauna'}],\n", + " 'rels': [{'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Mammals'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Birds'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Reptiles'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Amphibians'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Fish'}]},\n", + " {'communityId': '0-125',\n", + " 'nodes': [{'id': 'Department Of Defence Production',\n", + " 'description': 'Responsible for indigenous production of equipment for the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Make In India Initiative',\n", + " 'description': 'Seeks to indigenise manufacturing and reduce dependence on imports for defence.',\n", + " 'type': 'Program'}],\n", + " 'rels': [{'start': 'Make In India Initiative',\n", + " 'description': None,\n", + " 'type': 'SUPPORTS',\n", + " 'end': 'Department Of Defence Production'}]},\n", + " {'communityId': '0-169',\n", + " 'nodes': [{'id': 'Maurya Empire',\n", + " 'description': 'Ancient empire based in the Ganges Basin.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Gupta Empire',\n", + " 'description': 'Ancient empire based in the Ganges Basin.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Ganges Basin', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'Ganges Basin',\n", + " 'description': None,\n", + " 'type': 'BASED_IN',\n", + " 'end': 'Maurya Empire'},\n", + " {'start': 'Ganges Basin',\n", + " 'description': None,\n", + " 'type': 'BASED_IN',\n", + " 'end': 'Gupta Empire'}]},\n", + " {'communityId': '0-170',\n", + " 'nodes': [{'id': 'Vijayanagara Empire',\n", + " 'description': 'Empire in south India that created a composite Hindu culture.',\n", + " 'type': 'Empire'},\n", + " {'id': 'South India', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'South India',\n", + " 'description': None,\n", + " 'type': 'BASED_IN',\n", + " 'end': 'Vijayanagara Empire'}]},\n", + " {'communityId': '0-171',\n", + " 'nodes': [{'id': 'Sikhism',\n", + " 'description': 'Religion that emerged in the Punjab, rejecting institutionalised religion.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Punjab', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'Punjab',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Sikhism'}]},\n", + " {'communityId': '0-167',\n", + " 'nodes': [{'id': 'British East India Company',\n", + " 'description': 'Company that expanded rule in India, turning it into a colonial economy.',\n", + " 'type': 'Company'},\n", + " {'id': 'British Crown',\n", + " 'description': 'Began rule in India in 1858.',\n", + " 'type': 'Government'},\n", + " {'id': 'Indian Independence',\n", + " 'description': 'Event in 1947 when India was partitioned into two independent dominions.',\n", + " 'type': 'Event'}],\n", + " 'rels': [{'start': 'British East India Company',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_EVENT',\n", + " 'end': 'British Crown'},\n", + " {'start': 'British Crown',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_EVENT',\n", + " 'end': 'Indian Independence'}]},\n", + " {'communityId': '1-23',\n", + " 'nodes': [{'id': 'India',\n", + " 'description': 'Officially the Republic of India, located in South Asia.',\n", + " 'type': 'Country'},\n", + " {'id': 'Supreme Court Of India',\n", + " 'description': 'Head of the independent judiciary.',\n", + " 'type': 'Judiciary'},\n", + " {'id': 'Indian National Congress',\n", + " 'description': 'Dominated Indian politics until 1977.',\n", + " 'type': 'Political party'},\n", + " {'id': 'Sindhu',\n", + " 'description': 'Name of the Indus River, from which the name India is derived.',\n", + " 'type': 'River'},\n", + " {'id': 'Food And Agriculture Organization Of The United Nations',\n", + " 'description': \"Estimates India's forest cover.\",\n", + " 'type': 'Organization'},\n", + " {'id': '2013 Forest Survey Of India',\n", + " 'description': \"States India's forest cover increased to 69.8 million hectares by 2012.\",\n", + " 'type': 'Report'},\n", + " {'id': 'Yajnavalkya Smriti',\n", + " 'description': 'Prohibited the cutting of trees.',\n", + " 'type': 'Literature'},\n", + " {'id': 'Kautalyas Arthashastra',\n", + " 'description': 'Discusses the need for forest administration.',\n", + " 'type': 'Literature'},\n", + " {'id': 'Crown Land (Encroachment) Ordinance',\n", + " 'description': \"Promulgated in 1840 targeting forests in Britain's Asian colonies.\",\n", + " 'type': 'Legislation'},\n", + " {'id': 'Indian Forest Act Of 1865',\n", + " 'description': \"Established the government's claims over forests.\",\n", + " 'type': 'Legislation'},\n", + " {'id': 'Forest Act Of 1878',\n", + " 'description': 'Gave the British government control over all wastelands, including forests.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'Sir Dietrich Brandis',\n", + " 'description': 'Inspector General of Forests in India from 1864 to 1883.',\n", + " 'type': 'Person'},\n", + " {'id': 'Indian Ocean',\n", + " 'description': 'Ocean bounded by India on the north.',\n", + " 'type': 'Ocean'},\n", + " {'id': 'Sri Lanka',\n", + " 'description': 'Country in the vicinity of India in the Indian Ocean.',\n", + " 'type': 'Country'},\n", + " {'id': 'Maldives',\n", + " 'description': 'Country in the vicinity of India in the Indian Ocean.',\n", + " 'type': 'Country'},\n", + " {'id': 'Myanmar',\n", + " 'description': 'Country sharing land border with India to the east.',\n", + " 'type': 'Country'},\n", + " {'id': 'Himalayan Mountain Range',\n", + " 'description': 'Defines the northern frontiers of India.',\n", + " 'type': 'Mountain range'},\n", + " {'id': 'China',\n", + " 'description': 'Country sharing land border with India to the north.',\n", + " 'type': 'Country'},\n", + " {'id': 'Bhutan',\n", + " 'description': 'Country sharing land border with India to the north.',\n", + " 'type': 'Country'},\n", + " {'id': 'Nepal',\n", + " 'description': 'Country sharing land border with India to the north.',\n", + " 'type': 'Country'},\n", + " {'id': 'Pakistan',\n", + " 'description': 'Country sharing land border with India to the west.',\n", + " 'type': 'Country'},\n", + " {'id': 'Bangladesh',\n", + " 'description': 'Country sharing land border with India to the east.',\n", + " 'type': 'Country'},\n", + " {'id': 'Ganges',\n", + " 'description': 'The longest river originating in India.',\n", + " 'type': 'River'},\n", + " {'id': 'Western Ghats',\n", + " 'description': 'One of the three biodiversity hotspots in India.',\n", + " 'type': 'Biodiversity hotspot'},\n", + " {'id': 'Eastern Himalayas',\n", + " 'description': 'One of the three biodiversity hotspots in India.',\n", + " 'type': 'Biodiversity hotspot'},\n", + " {'id': 'Indo-Burma Hotspot',\n", + " 'description': 'One of the three biodiversity hotspots in India.',\n", + " 'type': 'Biodiversity hotspot'},\n", + " {'id': 'Forests',\n", + " 'description': 'Covers about 24.6% of the total land area.',\n", + " 'type': 'Ecosystem'},\n", + " {'id': 'Indomalayan Realm',\n", + " 'description': 'Geographical realm that includes India.',\n", + " 'type': 'Realm'},\n", + " {'id': 'World', 'description': None, 'type': None},\n", + " {'id': 'Public_Sector', 'description': None, 'type': None},\n", + " {'id': 'Gdp', 'description': None, 'type': None},\n", + " {'id': 'Economic_Liberalisation', 'description': None, 'type': None},\n", + " {'id': 'Gdp_Growth', 'description': None, 'type': None},\n", + " {'id': 'Consumer_Market', 'description': None, 'type': None},\n", + " {'id': 'Wto', 'description': None, 'type': None},\n", + " {'id': 'Ease_Of_Business_Index', 'description': None, 'type': None},\n", + " {'id': 'Global_Competitiveness_Index', 'description': None, 'type': None},\n", + " {'id': 'Billionaires', 'description': None, 'type': None},\n", + " {'id': 'Welfare_State', 'description': None, 'type': None},\n", + " {'id': 'Socialist_State', 'description': None, 'type': None},\n", + " {'id': 'Social_Welfare_Spending', 'description': None, 'type': None},\n", + " {'id': 'Labour_Force', 'description': None, 'type': None},\n", + " {'id': 'Fdi', 'description': None, 'type': None},\n", + " {'id': 'Free_Trade_Agreements', 'description': None, 'type': None},\n", + " {'id': 'Service_Sector', 'description': None, 'type': None},\n", + " {'id': 'Stock_Exchanges', 'description': None, 'type': None},\n", + " {'id': 'Manufacturing', 'description': None, 'type': None},\n", + " {'id': 'Population', 'description': None, 'type': None},\n", + " {'id': 'Unemployment', 'description': None, 'type': None},\n", + " {'id': 'Savings_Rate', 'description': None, 'type': None},\n", + " {'id': 'Arabian Sea',\n", + " 'description': 'Sea bounded by India on the northwest.',\n", + " 'type': 'Sea'},\n", + " {'id': 'Bay Of Bengal',\n", + " 'description': 'Bay bounded by India on the southeast.',\n", + " 'type': 'Bay'},\n", + " {'id': 'Africa',\n", + " 'description': 'Continent from which modern humans arrived on the Indian subcontinent.',\n", + " 'type': 'Continent'},\n", + " {'id': 'Indus Valley Civilisation',\n", + " 'description': 'Civilisation that evolved from settled life in the western margins of the Indus river basin.',\n", + " 'type': 'Civilization'},\n", + " {'id': 'Sanskrit',\n", + " 'description': 'Indo-European language that diffused into India from the northwest.',\n", + " 'type': 'Language'},\n", + " {'id': 'Rigveda',\n", + " 'description': 'Ancient hymns recording the dawning of Hinduism in India.',\n", + " 'type': 'Text'},\n", + " {'id': 'Hinduism',\n", + " 'description': 'Major religion in India, with roots in the Rigveda.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Buddhism',\n", + " 'description': 'Religion that arose in India, proclaiming social orders unlinked to heredity.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Jainism',\n", + " 'description': 'Religion that arose in India, proclaiming social orders unlinked to heredity.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Mughal Empire',\n", + " 'description': 'Empire that began in 1526, known for its architecture and relative peace.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Indian Republic',\n", + " 'description': 'India has been a federal republic since 1950.',\n", + " 'type': 'Government'},\n", + " {'id': 'Thailand',\n", + " 'description': \"Shares maritime borders with India's Andaman and Nicobar Islands.\",\n", + " 'type': 'Country'},\n", + " {'id': 'Indonesia',\n", + " 'description': \"Shares maritime borders with India's Andaman and Nicobar Islands.\",\n", + " 'type': 'Country'},\n", + " {'id': 'Andaman And Nicobar Islands',\n", + " 'description': 'Islands of India sharing a maritime border with Thailand, Myanmar, and Indonesia.',\n", + " 'type': 'Island'}],\n", + " 'rels': [{'start': 'India',\n", + " 'description': None,\n", + " 'type': 'IS_PART_OF',\n", + " 'end': 'Indomalayan Realm'},\n", + " {'start': 'India',\n", + " 'description': 'About 1,900 public sector companies with complete control and ownership of railways, highways, and majority control in various industries.',\n", + " 'type': 'HAS',\n", + " 'end': 'Public_Sector'},\n", + " {'start': 'India',\n", + " 'description': \"One of the world's highest number of billionaires.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Billionaires'},\n", + " {'start': 'India',\n", + " 'description': \"World's second-largest labour force with 586 million workers.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Labour_Force'},\n", + " {'start': 'India',\n", + " 'description': 'Has free trade agreements with several nations and blocs.',\n", + " 'type': 'HAS',\n", + " 'end': 'Free_Trade_Agreements'},\n", + " {'start': 'India',\n", + " 'description': \"Bombay Stock Exchange and National Stock Exchange are among the world's largest stock exchanges.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Stock_Exchanges'},\n", + " {'start': 'India',\n", + " 'description': \"Nearly 65% of India's population is rural.\",\n", + " 'type': 'HAS',\n", + " 'end': 'Population'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'RELATES_TO',\n", + " 'end': 'Rigveda'},\n", + " {'start': 'India',\n", + " 'description': 'Member of the World Trade Organization since 1 January 1995.',\n", + " 'type': 'MEMBER_OF',\n", + " 'end': 'Wto'},\n", + " {'start': 'India',\n", + " 'description': 'High unemployment and rising income inequality.',\n", + " 'type': 'FACES',\n", + " 'end': 'Unemployment'},\n", + " {'start': 'India',\n", + " 'description': 'Adopted broad economic liberalisation in 1991.',\n", + " 'type': 'ADOPTED',\n", + " 'end': 'Economic_Liberalisation'},\n", + " {'start': 'India',\n", + " 'description': 'Often considered a welfare state.',\n", + " 'type': 'CONSIDERED_AS',\n", + " 'end': 'Welfare_State'},\n", + " {'start': 'India', 'description': None, 'type': 'NEAR', 'end': 'Sri Lanka'},\n", + " {'start': 'India', 'description': None, 'type': 'NEAR', 'end': 'Maldives'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Indian Ocean'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Myanmar'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Himalayan Mountain Range'},\n", + " {'start': 'India', 'description': None, 'type': 'BORDERS', 'end': 'China'},\n", + " {'start': 'India', 'description': None, 'type': 'BORDERS', 'end': 'Bhutan'},\n", + " {'start': 'India', 'description': None, 'type': 'BORDERS', 'end': 'Nepal'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Pakistan'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Bangladesh'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Arabian Sea'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'BORDERS',\n", + " 'end': 'Bay Of Bengal'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'SEPARATED_BY',\n", + " 'end': 'Sri Lanka'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'DERIVED_FROM',\n", + " 'end': 'Sindhu'},\n", + " {'start': 'India',\n", + " 'description': \"Estimates India's forest cover.\",\n", + " 'type': 'ESTIMATES',\n", + " 'end': 'Food And Agriculture Organization Of The United Nations'},\n", + " {'start': 'India',\n", + " 'description': 'Makes up more than 50% of GDP.',\n", + " 'type': 'MAKES_UP',\n", + " 'end': 'Service_Sector'},\n", + " {'start': 'India',\n", + " 'description': 'Fifth-largest economy by nominal GDP and third-largest by purchasing power parity (PPP).',\n", + " 'type': 'RANKS_AS',\n", + " 'end': 'World'},\n", + " {'start': 'India',\n", + " 'description': 'Fourth-largest consumer market in the world.',\n", + " 'type': 'RANKS_AS',\n", + " 'end': 'Consumer_Market'},\n", + " {'start': 'India',\n", + " 'description': \"World's sixth-largest manufacturer, representing 2.6% of global manufacturing output.\",\n", + " 'type': 'RANKS_AS',\n", + " 'end': 'Manufacturing'},\n", + " {'start': 'India',\n", + " 'description': 'Officially declared a socialist state as per the constitution.',\n", + " 'type': 'DECLARED_AS',\n", + " 'end': 'Socialist_State'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Hinduism'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Buddhism'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Jainism'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'GOVERNED_AS',\n", + " 'end': 'Indian Republic'},\n", + " {'start': 'India',\n", + " 'description': '136th by GDP (nominal) and 125th by GDP (PPP) on a per capita income basis.',\n", + " 'type': 'RANKS',\n", + " 'end': 'Gdp'},\n", + " {'start': 'India',\n", + " 'description': '63rd on the Ease of Doing Business index.',\n", + " 'type': 'RANKS',\n", + " 'end': 'Ease_Of_Business_Index'},\n", + " {'start': 'India',\n", + " 'description': '40th on the Global Competitiveness Index.',\n", + " 'type': 'RANKS',\n", + " 'end': 'Global_Competitiveness_Index'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'SHARES_BORDERS_WITH',\n", + " 'end': 'Myanmar'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'SHARES_BORDERS_WITH',\n", + " 'end': 'Thailand'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'SHARES_BORDERS_WITH',\n", + " 'end': 'Indonesia'},\n", + " {'start': 'India',\n", + " 'description': 'Overall social welfare spending stood at 8.6% of GDP in 2021-22.',\n", + " 'type': 'SPENDING',\n", + " 'end': 'Social_Welfare_Spending'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_EVENT',\n", + " 'end': 'Mughal Empire'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'ORIGINATES_IN',\n", + " 'end': 'Ganges'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_BIODIVERSITY_HOTSPOT',\n", + " 'end': 'Western Ghats'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_BIODIVERSITY_HOTSPOT',\n", + " 'end': 'Eastern Himalayas'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_BIODIVERSITY_HOTSPOT',\n", + " 'end': 'Indo-Burma Hotspot'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HAS_ECOSYSTEM',\n", + " 'end': 'Forests'},\n", + " {'start': 'India',\n", + " 'description': '6% to 7% since the start of the 21st century.',\n", + " 'type': 'AVERAGE_ANNUAL_GROWTH',\n", + " 'end': 'Gdp_Growth'},\n", + " {'start': 'India',\n", + " 'description': 'Foreign direct investment in 2021-22 was $82 billion.',\n", + " 'type': 'FOREIGN_DIRECT_INVESTMENT',\n", + " 'end': 'Fdi'},\n", + " {'start': 'India',\n", + " 'description': 'Gross domestic savings rate stood at 29.3% of GDP in 2022.',\n", + " 'type': 'GROSS_DOMESTIC_SAVINGS_RATE',\n", + " 'end': 'Savings_Rate'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HUMAN_ORIGIN',\n", + " 'end': 'Africa'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_DEVELOPMENT',\n", + " 'end': 'Indus Valley Civilisation'},\n", + " {'start': 'India',\n", + " 'description': None,\n", + " 'type': 'LANGUAGE_DIFFUSION',\n", + " 'end': 'Sanskrit'},\n", + " {'start': 'Kautalyas Arthashastra',\n", + " 'description': 'Discusses the need for forest administration.',\n", + " 'type': 'DISCUSSES',\n", + " 'end': 'India'},\n", + " {'start': 'Indian Forest Act Of 1865',\n", + " 'description': \"Establishes government's claims over forests.\",\n", + " 'type': 'ESTABLISHES',\n", + " 'end': 'India'},\n", + " {'start': 'Crown Land (Encroachment) Ordinance',\n", + " 'description': \"Targets forests in Britain's Asian colonies.\",\n", + " 'type': 'TARGETS',\n", + " 'end': 'India'},\n", + " {'start': 'Yajnavalkya Smriti',\n", + " 'description': 'Prohibits the cutting of trees.',\n", + " 'type': 'PROHIBITS',\n", + " 'end': 'India'},\n", + " {'start': '2013 Forest Survey Of India',\n", + " 'description': 'Reports on forest cover increase.',\n", + " 'type': 'REPORTS_ON',\n", + " 'end': 'India'},\n", + " {'start': 'Supreme Court Of India',\n", + " 'description': None,\n", + " 'type': 'PART_OF_JUDICIARY',\n", + " 'end': 'India'},\n", + " {'start': 'Indian National Congress',\n", + " 'description': None,\n", + " 'type': 'HISTORICALLY_DOMINANT_PARTY',\n", + " 'end': 'India'},\n", + " {'start': 'Forest Act Of 1878',\n", + " 'description': 'Gives control over all wastelands, including forests.',\n", + " 'type': 'GIVES_CONTROL',\n", + " 'end': 'India'},\n", + " {'start': 'Sir Dietrich Brandis',\n", + " 'description': 'Inspector General of Forests in India from 1864 to 1883.',\n", + " 'type': 'INSPECTOR_GENERAL_OF',\n", + " 'end': 'India'},\n", + " {'start': 'Andaman And Nicobar Islands',\n", + " 'description': None,\n", + " 'type': 'MARITIME_BORDER',\n", + " 'end': 'Myanmar'},\n", + " {'start': 'Andaman And Nicobar Islands',\n", + " 'description': None,\n", + " 'type': 'MARITIME_BORDER',\n", + " 'end': 'Thailand'},\n", + " {'start': 'Andaman And Nicobar Islands',\n", + " 'description': None,\n", + " 'type': 'MARITIME_BORDER',\n", + " 'end': 'Indonesia'}]},\n", + " {'communityId': '1-7',\n", + " 'nodes': [{'id': 'Constitution Of India',\n", + " 'description': 'The supreme law of India, laying down the framework for government institutions and fundamental rights.',\n", + " 'type': 'Document'},\n", + " {'id': 'Parliament',\n", + " 'description': 'Cannot override the Constitution of India.',\n", + " 'type': 'Institution'},\n", + " {'id': 'Government Of India Act 1935',\n", + " 'description': 'Replaced by the Constitution of India.',\n", + " 'type': 'Document'},\n", + " {'id': 'Constituent Assembly',\n", + " 'description': 'Tasked with drafting the Constitution of India.',\n", + " 'type': 'Government body'},\n", + " {'id': 'M. N. Roy',\n", + " 'description': 'Proposed the framework for the Constitution of India.',\n", + " 'type': 'Person'},\n", + " {'id': 'Republic Day',\n", + " 'description': 'Celebrated on 26 January in India to honor the Constitution.',\n", + " 'type': 'Event'},\n", + " {'id': 'Old Parliament House',\n", + " 'description': 'Preserves the original 1950 Constitution in a nitrogen-filled case.',\n", + " 'type': 'Location'},\n", + " {'id': 'British Government',\n", + " 'description': 'Responsible for the external security of India during its dominion status.',\n", + " 'type': 'Institution'},\n", + " {'id': 'Indian Independence Act 1947',\n", + " 'description': 'Repealed by the Constitution of India.',\n", + " 'type': 'Document'},\n", + " {'id': 'Articles Of The Constitution',\n", + " 'description': 'Articles 5, 6, 7, 8, 9, 60, 324, 366, 367, 379, 380, 388, 391, 392, 393, and 394 came into force on 26 November 1949.',\n", + " 'type': 'Document'},\n", + " {'id': 'Constituent Assembly Of India',\n", + " 'description': 'Adopted the Constitution of India on 26 November 1949.',\n", + " 'type': 'Institution'},\n", + " {'id': 'Sardar Patel',\n", + " 'description': 'Convinced princely states to sign articles of integration with India.',\n", + " 'type': 'Person'},\n", + " {'id': 'V. P. Menon',\n", + " 'description': 'Convinced princely states to sign articles of integration with India.',\n", + " 'type': 'Person'}],\n", + " 'rels': [{'start': 'Constitution Of India',\n", + " 'description': 'The Constitution is based on the proposal suggested by M. N. Roy.',\n", + " 'type': 'BASED_ON',\n", + " 'end': 'M. N. Roy'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Adopted by the Constituent Assembly on 26 November 1949.',\n", + " 'type': 'ADOPTED_BY',\n", + " 'end': 'Constituent Assembly Of India'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'The original 1950 Constitution is preserved in a nitrogen-filled case.',\n", + " 'type': 'PRESERVED_IN',\n", + " 'end': 'Old Parliament House'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Celebrated on 26 January.',\n", + " 'type': 'CELEBRATED_ON',\n", + " 'end': 'Republic Day'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Parliament cannot override the Constitution.',\n", + " 'type': 'CANNOT_OVERRIDE',\n", + " 'end': 'Parliament'},\n", + " {'start': 'Constitution Of India',\n", + " 'description': 'Certain articles came into force on 26 November 1949.',\n", + " 'type': 'CAME_INTO_FORCE',\n", + " 'end': 'Articles Of The Constitution'},\n", + " {'start': 'Government Of India Act 1935',\n", + " 'description': 'Replaced by the Constitution of India.',\n", + " 'type': 'REPLACED_BY',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'British Government',\n", + " 'description': 'Responsible for the external security of India during its dominion status.',\n", + " 'type': 'RESPONSIBLE_FOR',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'Indian Independence Act 1947',\n", + " 'description': 'Repealed by the Constitution of India.',\n", + " 'type': 'REPEALED_BY',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'Constituent Assembly',\n", + " 'description': None,\n", + " 'type': 'DRAFTED_BY',\n", + " 'end': 'Constitution Of India'},\n", + " {'start': 'Sardar Patel',\n", + " 'description': 'Convinced princely states to sign articles of integration.',\n", + " 'type': 'CONVINCED',\n", + " 'end': 'Constituent Assembly Of India'},\n", + " {'start': 'V. P. Menon',\n", + " 'description': 'Convinced princely states to sign articles of integration.',\n", + " 'type': 'CONVINCED',\n", + " 'end': 'Constituent Assembly Of India'}]},\n", + " {'communityId': '1-4',\n", + " 'nodes': [{'id': 'President Of India',\n", + " 'description': 'The Supreme Commander of the Indian Armed Forces.',\n", + " 'type': 'Person'},\n", + " {'id': 'Nda',\n", + " 'description': 'A coalition of the BJP and its allies governing India since 2014.',\n", + " 'type': 'Political alliance'},\n", + " {'id': 'Government Of India',\n", + " 'description': 'Established a system of national parks and protected areas.',\n", + " 'type': 'Government'},\n", + " {'id': 'Narendra Modi',\n", + " 'description': 'Current Prime Minister of India since 26 May 2014.',\n", + " 'type': 'Person'},\n", + " {'id': 'New Delhi',\n", + " 'description': 'The seat of the Government of India.',\n", + " 'type': 'Location'},\n", + " {'id': 'Supreme Court',\n", + " 'description': 'The highest judicial forum and final court of appeal under the Constitution of India.',\n", + " 'type': 'Judiciary'},\n", + " {'id': 'Indian Councils Act 1909',\n", + " 'description': 'Introduced elections to the Imperial Legislative Council.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'Government Of India Act 1919',\n", + " 'description': 'Expanded the Imperial Legislative Council.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'National Parks',\n", + " 'description': 'Established in 1935 and expanded to nearly 1022 by 2023.',\n", + " 'type': 'Protected area'},\n", + " {'id': 'Wildlife Protection Act Of 1972',\n", + " 'description': 'Enacted for the protection of wildlife.',\n", + " 'type': 'Legislation'},\n", + " {'id': 'Project Tiger',\n", + " 'description': 'Special project for the protection of critical species.',\n", + " 'type': 'Conservation project'},\n", + " {'id': 'Project Elephant',\n", + " 'description': 'Special project for the protection of critical species.',\n", + " 'type': 'Conservation project'},\n", + " {'id': 'Project Dolphin',\n", + " 'description': 'Special project for the protection of critical species.',\n", + " 'type': 'Conservation project'},\n", + " {'id': 'Rajya Sabha',\n", + " 'description': 'The mostly indirectly elected upper house of Parliament.',\n", + " 'type': 'Government body'},\n", + " {'id': 'Lok Sabha',\n", + " 'description': 'The directly elected lower house of Parliament.',\n", + " 'type': 'Government body'},\n", + " {'id': 'Union Council Of Ministers',\n", + " 'description': 'The executive decision-making committee of the government.',\n", + " 'type': 'Government body'}],\n", + " 'rels': [{'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'INITIATED',\n", + " 'end': 'Project Tiger'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'INITIATED',\n", + " 'end': 'Project Elephant'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'INITIATED',\n", + " 'end': 'Project Dolphin'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'GOVERNED_BY',\n", + " 'end': 'Nda'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'COMPOSED_OF',\n", + " 'end': 'Union Council Of Ministers'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'ENACTED',\n", + " 'end': 'Wildlife Protection Act Of 1972'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'ESTABLISHED',\n", + " 'end': 'National Parks'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'LEAD_BY',\n", + " 'end': 'Narendra Modi'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'RESPONSIBLE_TO',\n", + " 'end': 'Supreme Court'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'SEATED_IN',\n", + " 'end': 'New Delhi'},\n", + " {'start': 'Government Of India',\n", + " 'description': None,\n", + " 'type': 'HEAD_OF_STATE',\n", + " 'end': 'President Of India'},\n", + " {'start': 'Indian Councils Act 1909',\n", + " 'description': None,\n", + " 'type': 'INFLUENCED',\n", + " 'end': 'Government Of India'},\n", + " {'start': 'Government Of India Act 1919',\n", + " 'description': None,\n", + " 'type': 'INFLUENCED',\n", + " 'end': 'Government Of India'},\n", + " {'start': 'Union Council Of Ministers',\n", + " 'description': None,\n", + " 'type': 'RESPONSIBLE_TO',\n", + " 'end': 'Rajya Sabha'},\n", + " {'start': 'Union Council Of Ministers',\n", + " 'description': None,\n", + " 'type': 'RESPONSIBLE_TO',\n", + " 'end': 'Lok Sabha'}]},\n", + " {'communityId': '1-22',\n", + " 'nodes': [{'id': 'Prime Minister Of India',\n", + " 'description': 'Holds executive authority and responsibility for national security.',\n", + " 'type': 'Person'},\n", + " {'id': 'Indian Armed Forces',\n", + " 'description': 'The military forces of the Republic of India.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Army',\n", + " 'description': 'One of the three professional uniformed services of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Navy',\n", + " 'description': 'One of the three professional uniformed services of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Air Force',\n", + " 'description': 'One of the three professional uniformed services of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Central Armed Police Forces',\n", + " 'description': 'Supports the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Indian Coast Guard',\n", + " 'description': 'Supports the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Special Frontier Force',\n", + " 'description': 'Supports the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Strategic Forces Command',\n", + " 'description': 'An inter-service command of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Andaman And Nicobar Command',\n", + " 'description': 'An inter-service command of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Integrated Defence Staff',\n", + " 'description': 'An inter-service command of the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Ministry Of Defence',\n", + " 'description': 'Manages the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Global Firepower Index',\n", + " 'description': 'Ranks the military forces globally.',\n", + " 'type': 'Report'},\n", + " {'id': 'Armed Forces Flag Day',\n", + " 'description': 'Honors armed forces and military personnel annually.',\n", + " 'type': 'Event'},\n", + " {'id': 'Mauryan Empire',\n", + " 'description': 'An ancient Indian empire known for its powerful military.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Chola Empire',\n", + " 'description': 'Known for foreign trade and maritime activity.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Department Of Defence Production',\n", + " 'description': 'Responsible for indigenous production of equipment for the Indian Armed Forces.',\n", + " 'type': 'Organization'},\n", + " {'id': 'Make In India Initiative',\n", + " 'description': 'Seeks to indigenise manufacturing and reduce dependence on imports for defence.',\n", + " 'type': 'Program'}],\n", + " 'rels': [{'start': 'Prime Minister Of India',\n", + " 'description': None,\n", + " 'type': 'EXECUTIVE_AUTHORITY',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Indian Army'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Indian Navy'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Indian Air Force'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Central Armed Police Forces'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Indian Coast Guard'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Special Frontier Force'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Strategic Forces Command'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Andaman And Nicobar Command'},\n", + " {'start': 'Indian Armed Forces',\n", + " 'description': None,\n", + " 'type': 'SUPPORTED_BY',\n", + " 'end': 'Integrated Defence Staff'},\n", + " {'start': 'Ministry Of Defence',\n", + " 'description': None,\n", + " 'type': 'MANAGES',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Global Firepower Index',\n", + " 'description': None,\n", + " 'type': 'RANKS',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Armed Forces Flag Day',\n", + " 'description': None,\n", + " 'type': 'HONORS',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Department Of Defence Production',\n", + " 'description': None,\n", + " 'type': 'PRODUCES_EQUIPMENT_FOR',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Mauryan Empire',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_REFERENCE',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Chola Empire',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_REFERENCE',\n", + " 'end': 'Indian Armed Forces'},\n", + " {'start': 'Make In India Initiative',\n", + " 'description': None,\n", + " 'type': 'SUPPORTS',\n", + " 'end': 'Department Of Defence Production'}]},\n", + " {'communityId': '1-2',\n", + " 'nodes': [{'id': 'Republic Of India',\n", + " 'description': 'Has two principal official short names, India and Bharat.',\n", + " 'type': 'Country'},\n", + " {'id': 'Hindustan',\n", + " 'description': 'Commonly used name for the Republic of India.',\n", + " 'type': 'Name'},\n", + " {'id': 'Bharat',\n", + " 'description': 'Another principal official short name of the Republic of India.',\n", + " 'type': 'Name'},\n", + " {'id': 'Dushyanta',\n", + " 'description': 'Father of Bharata, associated with the name Bhāratavarṣa.',\n", + " 'type': 'Person'},\n", + " {'id': 'Mahabharata',\n", + " 'description': 'Epic associated with the name Bharata.',\n", + " 'type': 'Literature'},\n", + " {'id': 'Bhāratavarṣa',\n", + " 'description': 'Term used in the first century AD, derived from the name of the Vedic community of Bharatas.',\n", + " 'type': 'Historical term'},\n", + " {'id': 'Bharatas',\n", + " 'description': 'Mentioned in the Rigveda as one of the principal kingdoms of the Aryavarta.',\n", + " 'type': 'Vedic community'}],\n", + " 'rels': [{'start': 'Republic Of India',\n", + " 'description': None,\n", + " 'type': 'HAS_NAME',\n", + " 'end': 'Bharat'},\n", + " {'start': 'Republic Of India',\n", + " 'description': None,\n", + " 'type': 'HAS_NAME',\n", + " 'end': 'Hindustan'},\n", + " {'start': 'Bharat',\n", + " 'description': None,\n", + " 'type': 'ASSOCIATED_WITH',\n", + " 'end': 'Dushyanta'},\n", + " {'start': 'Bharat',\n", + " 'description': None,\n", + " 'type': 'ASSOCIATED_WITH',\n", + " 'end': 'Mahabharata'},\n", + " {'start': 'Bharat',\n", + " 'description': None,\n", + " 'type': 'DERIVED_FROM',\n", + " 'end': 'Bhāratavarṣa'},\n", + " {'start': 'Bhāratavarṣa',\n", + " 'description': None,\n", + " 'type': 'DERIVED_FROM',\n", + " 'end': 'Bharatas'}]},\n", + " {'communityId': '1-18',\n", + " 'nodes': [{'id': 'Fauna Species',\n", + " 'description': 'India has an estimated 92,873 species of fauna.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Insects',\n", + " 'description': 'Major category with 63,423 recorded species.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Mammals Count',\n", + " 'description': '423 mammals in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Birds Count',\n", + " 'description': '1,233 birds in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Reptiles Count',\n", + " 'description': '526 reptiles in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Amphibians Count',\n", + " 'description': '342 amphibians in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Fish Count',\n", + " 'description': '3,022 fish in India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Mammals',\n", + " 'description': '12.6% of mammals are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Birds',\n", + " 'description': '4.5% of birds are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Reptiles',\n", + " 'description': '45.8% of reptiles are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Amphibians',\n", + " 'description': '55.8% of amphibians are endemic to India.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Large Animals',\n", + " 'description': 'Includes Indian elephant, Indian rhinoceros, and Gaur.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Big Cats',\n", + " 'description': 'Includes tiger and lion.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Cat Family',\n", + " 'description': 'Includes Bengal tiger, Asiatic lion, Indian leopard, snow leopard, and clouded leopard.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Representative Species',\n", + " 'description': 'Includes blackbuck, nilgai, bharal, barasingha, Nilgiri tahr, and Nilgiri langur.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Aquatic Mammals',\n", + " 'description': 'Includes dolphins, whales, porpoises, and dugong.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Reptiles Species',\n", + " 'description': 'Includes gharial and saltwater crocodiles.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Birds Species',\n", + " 'description': 'Includes peafowl, pheasants, geese, ducks, mynas, parakeets, pigeons, cranes, hornbills, and sunbirds.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Endemic Bird Species',\n", + " 'description': 'Includes great Indian hornbill, great Indian bustard, nicobar pigeon, ruddy shelduck, Himalayan monal, and Himalayan quail.',\n", + " 'type': 'Fauna'}],\n", + " 'rels': [{'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Insects'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Mammals Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Birds Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Reptiles Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Amphibians Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Fish Count'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Mammals'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Birds'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Reptiles'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Amphibians'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Large Animals'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Big Cats'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Cat Family'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Representative Species'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Aquatic Mammals'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Reptiles Species'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Birds Species'},\n", + " {'start': 'Fauna Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Bird Species'}]},\n", + " {'communityId': '1-20',\n", + " 'nodes': [{'id': 'Plant Species',\n", + " 'description': 'About 29,015 species of plants.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Flowering Plants Count',\n", + " 'description': '17,926 species of flowering plants.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Endemic Plant Species',\n", + " 'description': '6,842 species are endemic to India.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Algae', 'description': '7,244 species.', 'type': 'Flora'},\n", + " {'id': 'Bryophytes', 'description': '2,504 species.', 'type': 'Flora'},\n", + " {'id': 'Pteridophytes', 'description': '1,267 species.', 'type': 'Flora'},\n", + " {'id': 'Gymnosperms', 'description': '74 species.', 'type': 'Flora'},\n", + " {'id': 'Fungal Diversity',\n", + " 'description': 'Over 27,000 recorded species.',\n", + " 'type': 'Flora'},\n", + " {'id': 'Flora', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Plant Species'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Flowering Plants Count'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Endemic Plant Species'},\n", + " {'start': 'Flora', 'description': None, 'type': 'INCLUDES', 'end': 'Algae'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Bryophytes'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Pteridophytes'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Gymnosperms'},\n", + " {'start': 'Flora',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Fungal Diversity'}]},\n", + " {'communityId': '1-19',\n", + " 'nodes': [{'id': 'Threatened Species',\n", + " 'description': '172 IUCN-designated threatened species.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Mammals',\n", + " 'description': '39 species of mammals.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Birds',\n", + " 'description': '72 species of birds.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Reptiles',\n", + " 'description': '17 species of reptiles.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Amphibians',\n", + " 'description': '3 species of amphibians.',\n", + " 'type': 'Fauna'},\n", + " {'id': 'Threatened Fish',\n", + " 'description': '2 species of fish.',\n", + " 'type': 'Fauna'}],\n", + " 'rels': [{'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Mammals'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Birds'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Reptiles'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Amphibians'},\n", + " {'start': 'Threatened Species',\n", + " 'description': None,\n", + " 'type': 'INCLUDES',\n", + " 'end': 'Threatened Fish'}]},\n", + " {'communityId': '1-25',\n", + " 'nodes': [{'id': 'Maurya Empire',\n", + " 'description': 'Ancient empire based in the Ganges Basin.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Gupta Empire',\n", + " 'description': 'Ancient empire based in the Ganges Basin.',\n", + " 'type': 'Empire'},\n", + " {'id': 'Ganges Basin', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'Ganges Basin',\n", + " 'description': None,\n", + " 'type': 'BASED_IN',\n", + " 'end': 'Maurya Empire'},\n", + " {'start': 'Ganges Basin',\n", + " 'description': None,\n", + " 'type': 'BASED_IN',\n", + " 'end': 'Gupta Empire'}]},\n", + " {'communityId': '1-26',\n", + " 'nodes': [{'id': 'Vijayanagara Empire',\n", + " 'description': 'Empire in south India that created a composite Hindu culture.',\n", + " 'type': 'Empire'},\n", + " {'id': 'South India', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'South India',\n", + " 'description': None,\n", + " 'type': 'BASED_IN',\n", + " 'end': 'Vijayanagara Empire'}]},\n", + " {'communityId': '1-27',\n", + " 'nodes': [{'id': 'Sikhism',\n", + " 'description': 'Religion that emerged in the Punjab, rejecting institutionalised religion.',\n", + " 'type': 'Religion'},\n", + " {'id': 'Punjab', 'description': None, 'type': None}],\n", + " 'rels': [{'start': 'Punjab',\n", + " 'description': None,\n", + " 'type': 'ORIGIN_OF',\n", + " 'end': 'Sikhism'}]},\n", + " {'communityId': '1-24',\n", + " 'nodes': [{'id': 'British East India Company',\n", + " 'description': 'Company that expanded rule in India, turning it into a colonial economy.',\n", + " 'type': 'Company'},\n", + " {'id': 'British Crown',\n", + " 'description': 'Began rule in India in 1858.',\n", + " 'type': 'Government'},\n", + " {'id': 'Indian Independence',\n", + " 'description': 'Event in 1947 when India was partitioned into two independent dominions.',\n", + " 'type': 'Event'}],\n", + " 'rels': [{'start': 'British East India Company',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_EVENT',\n", + " 'end': 'British Crown'},\n", + " {'start': 'British Crown',\n", + " 'description': None,\n", + " 'type': 'HISTORICAL_EVENT',\n", + " 'end': 'Indian Independence'}]}]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "community_info" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "community_template = \"\"\"Based on the provided nodes and relationships that belong to the same graph community,\n", + "generate a natural language summary of the provided information:\n", + "{community_info}\n", + "\n", + "Summary:\"\"\" \n", + "model = \"openai-gpt-4o\"\n", + "llm, model_name = get_llm(model)\n", + "community_prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"Given an input triples, generate the information summary. No pre-amble.\",\n", + " ),\n", + " (\"human\", community_template),\n", + " ]\n", + ")\n", + "\n", + "community_chain = community_prompt | llm | StrOutputParser()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def prepare_string(data):\n", + " nodes_str = \"Nodes are:\\n\"\n", + " for node in data['nodes']:\n", + " node_id = node['id']\n", + " node_type = node['type']\n", + " if 'description' in node and node['description']:\n", + " node_description = f\", description: {node['description']}\"\n", + " else:\n", + " node_description = \"\"\n", + " nodes_str += f\"id: {node_id}, type: {node_type}{node_description}\\n\"\n", + "\n", + " rels_str = \"Relationships are:\\n\"\n", + " for rel in data['rels']:\n", + " start = rel['start']\n", + " end = rel['end']\n", + " rel_type = rel['type']\n", + " if 'description' in rel and rel['description']:\n", + " description = f\", description: {rel['description']}\"\n", + " else:\n", + " description = \"\"\n", + " rels_str += f\"({start})-[:{rel_type}]->({end}){description}\\n\"\n", + "\n", + " return nodes_str + \"\\n\" + rels_str\n", + "\n", + "def process_community(community):\n", + " stringify_info = prepare_string(community)\n", + " summary = community_chain.invoke({'community_info': stringify_info})\n", + " return {\"community\": community['communityId'], \"summary\": summary}" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nodes are:\n", + "id: India, type: Country, description: Officially the Republic of India, located in South Asia.\n", + "id: Supreme Court Of India, type: Judiciary, description: Head of the independent judiciary.\n", + "id: Indian National Congress, type: Political party, description: Dominated Indian politics until 1977.\n", + "id: Sindhu, type: River, description: Name of the Indus River, from which the name India is derived.\n", + "id: Food And Agriculture Organization Of The United Nations, type: Organization, description: Estimates India's forest cover.\n", + "id: 2013 Forest Survey Of India, type: Report, description: States India's forest cover increased to 69.8 million hectares by 2012.\n", + "id: Yajnavalkya Smriti, type: Literature, description: Prohibited the cutting of trees.\n", + "id: Kautalyas Arthashastra, type: Literature, description: Discusses the need for forest administration.\n", + "id: Crown Land (Encroachment) Ordinance, type: Legislation, description: Promulgated in 1840 targeting forests in Britain's Asian colonies.\n", + "id: Indian Forest Act Of 1865, type: Legislation, description: Established the government's claims over forests.\n", + "id: Forest Act Of 1878, type: Legislation, description: Gave the British government control over all wastelands, including forests.\n", + "id: Sir Dietrich Brandis, type: Person, description: Inspector General of Forests in India from 1864 to 1883.\n", + "id: Indian Ocean, type: Ocean, description: Ocean bounded by India on the north.\n", + "id: Sri Lanka, type: Country, description: Country in the vicinity of India in the Indian Ocean.\n", + "id: Maldives, type: Country, description: Country in the vicinity of India in the Indian Ocean.\n", + "id: Myanmar, type: Country, description: Country sharing land border with India to the east.\n", + "id: Himalayan Mountain Range, type: Mountain range, description: Defines the northern frontiers of India.\n", + "id: China, type: Country, description: Country sharing land border with India to the north.\n", + "id: Bhutan, type: Country, description: Country sharing land border with India to the north.\n", + "id: Nepal, type: Country, description: Country sharing land border with India to the north.\n", + "id: Pakistan, type: Country, description: Country sharing land border with India to the west.\n", + "id: Bangladesh, type: Country, description: Country sharing land border with India to the east.\n", + "id: Ganges, type: River, description: The longest river originating in India.\n", + "id: Western Ghats, type: Biodiversity hotspot, description: One of the three biodiversity hotspots in India.\n", + "id: Eastern Himalayas, type: Biodiversity hotspot, description: One of the three biodiversity hotspots in India.\n", + "id: Indo-Burma Hotspot, type: Biodiversity hotspot, description: One of the three biodiversity hotspots in India.\n", + "id: Forests, type: Ecosystem, description: Covers about 24.6% of the total land area.\n", + "id: Indomalayan Realm, type: Realm, description: Geographical realm that includes India.\n", + "id: World, type: None\n", + "id: Public_Sector, type: None\n", + "id: Gdp, type: None\n", + "id: Economic_Liberalisation, type: None\n", + "id: Gdp_Growth, type: None\n", + "id: Consumer_Market, type: None\n", + "id: Wto, type: None\n", + "id: Ease_Of_Business_Index, type: None\n", + "id: Global_Competitiveness_Index, type: None\n", + "id: Billionaires, type: None\n", + "id: Welfare_State, type: None\n", + "id: Socialist_State, type: None\n", + "id: Social_Welfare_Spending, type: None\n", + "id: Labour_Force, type: None\n", + "id: Fdi, type: None\n", + "id: Free_Trade_Agreements, type: None\n", + "id: Service_Sector, type: None\n", + "id: Stock_Exchanges, type: None\n", + "id: Manufacturing, type: None\n", + "id: Population, type: None\n", + "id: Unemployment, type: None\n", + "id: Savings_Rate, type: None\n", + "id: Arabian Sea, type: Sea, description: Sea bounded by India on the northwest.\n", + "id: Bay Of Bengal, type: Bay, description: Bay bounded by India on the southeast.\n", + "id: Africa, type: Continent, description: Continent from which modern humans arrived on the Indian subcontinent.\n", + "id: Indus Valley Civilisation, type: Civilization, description: Civilisation that evolved from settled life in the western margins of the Indus river basin.\n", + "id: Sanskrit, type: Language, description: Indo-European language that diffused into India from the northwest.\n", + "id: Rigveda, type: Text, description: Ancient hymns recording the dawning of Hinduism in India.\n", + "id: Hinduism, type: Religion, description: Major religion in India, with roots in the Rigveda.\n", + "id: Buddhism, type: Religion, description: Religion that arose in India, proclaiming social orders unlinked to heredity.\n", + "id: Jainism, type: Religion, description: Religion that arose in India, proclaiming social orders unlinked to heredity.\n", + "id: Mughal Empire, type: Empire, description: Empire that began in 1526, known for its architecture and relative peace.\n", + "id: Indian Republic, type: Government, description: India has been a federal republic since 1950.\n", + "\n", + "Relationships are:\n", + "(India)-[:IS_PART_OF]->(Indomalayan Realm)\n", + "(India)-[:HAS]->(Public_Sector), description: About 1,900 public sector companies with complete control and ownership of railways, highways, and majority control in various industries.\n", + "(India)-[:HAS]->(Billionaires), description: One of the world's highest number of billionaires.\n", + "(India)-[:HAS]->(Labour_Force), description: World's second-largest labour force with 586 million workers.\n", + "(India)-[:HAS]->(Free_Trade_Agreements), description: Has free trade agreements with several nations and blocs.\n", + "(India)-[:HAS]->(Stock_Exchanges), description: Bombay Stock Exchange and National Stock Exchange are among the world's largest stock exchanges.\n", + "(India)-[:HAS]->(Population), description: Nearly 65% of India's population is rural.\n", + "(India)-[:RELATES_TO]->(Rigveda)\n", + "(India)-[:MEMBER_OF]->(Wto), description: Member of the World Trade Organization since 1 January 1995.\n", + "(India)-[:FACES]->(Unemployment), description: High unemployment and rising income inequality.\n", + "(India)-[:ADOPTED]->(Economic_Liberalisation), description: Adopted broad economic liberalisation in 1991.\n", + "(India)-[:CONSIDERED_AS]->(Welfare_State), description: Often considered a welfare state.\n", + "(India)-[:NEAR]->(Sri Lanka)\n", + "(India)-[:NEAR]->(Maldives)\n", + "(India)-[:BORDERS]->(Indian Ocean)\n", + "(India)-[:BORDERS]->(Myanmar)\n", + "(India)-[:BORDERS]->(Himalayan Mountain Range)\n", + "(India)-[:BORDERS]->(China)\n", + "(India)-[:BORDERS]->(Bhutan)\n", + "(India)-[:BORDERS]->(Nepal)\n", + "(India)-[:BORDERS]->(Pakistan)\n", + "(India)-[:BORDERS]->(Bangladesh)\n", + "(India)-[:BORDERS]->(Arabian Sea)\n", + "(India)-[:BORDERS]->(Bay Of Bengal)\n", + "(India)-[:SEPARATED_BY]->(Sri Lanka)\n", + "(India)-[:DERIVED_FROM]->(Sindhu)\n", + "(India)-[:ESTIMATES]->(Food And Agriculture Organization Of The United Nations), description: Estimates India's forest cover.\n", + "(India)-[:MAKES_UP]->(Service_Sector), description: Makes up more than 50% of GDP.\n", + "(India)-[:RANKS_AS]->(World), description: Fifth-largest economy by nominal GDP and third-largest by purchasing power parity (PPP).\n", + "(India)-[:RANKS_AS]->(Consumer_Market), description: Fourth-largest consumer market in the world.\n", + "(India)-[:RANKS_AS]->(Manufacturing), description: World's sixth-largest manufacturer, representing 2.6% of global manufacturing output.\n", + "(India)-[:DECLARED_AS]->(Socialist_State), description: Officially declared a socialist state as per the constitution.\n", + "(India)-[:ORIGIN_OF]->(Hinduism)\n", + "(India)-[:ORIGIN_OF]->(Buddhism)\n", + "(India)-[:ORIGIN_OF]->(Jainism)\n", + "(India)-[:GOVERNED_AS]->(Indian Republic)\n", + "(India)-[:RANKS]->(Gdp), description: 136th by GDP (nominal) and 125th by GDP (PPP) on a per capita income basis.\n", + "(India)-[:RANKS]->(Ease_Of_Business_Index), description: 63rd on the Ease of Doing Business index.\n", + "(India)-[:RANKS]->(Global_Competitiveness_Index), description: 40th on the Global Competitiveness Index.\n", + "(India)-[:SHARES_BORDERS_WITH]->(Myanmar)\n", + "(India)-[:SPENDING]->(Social_Welfare_Spending), description: Overall social welfare spending stood at 8.6% of GDP in 2021-22.\n", + "(India)-[:HISTORICAL_EVENT]->(Mughal Empire)\n", + "(India)-[:ORIGINATES_IN]->(Ganges)\n", + "(India)-[:HAS_BIODIVERSITY_HOTSPOT]->(Western Ghats)\n", + "(India)-[:HAS_BIODIVERSITY_HOTSPOT]->(Eastern Himalayas)\n", + "(India)-[:HAS_BIODIVERSITY_HOTSPOT]->(Indo-Burma Hotspot)\n", + "(India)-[:HAS_ECOSYSTEM]->(Forests)\n", + "(India)-[:AVERAGE_ANNUAL_GROWTH]->(Gdp_Growth), description: 6% to 7% since the start of the 21st century.\n", + "(India)-[:FOREIGN_DIRECT_INVESTMENT]->(Fdi), description: Foreign direct investment in 2021-22 was $82 billion.\n", + "(India)-[:GROSS_DOMESTIC_SAVINGS_RATE]->(Savings_Rate), description: Gross domestic savings rate stood at 29.3% of GDP in 2022.\n", + "(India)-[:HUMAN_ORIGIN]->(Africa)\n", + "(India)-[:HISTORICAL_DEVELOPMENT]->(Indus Valley Civilisation)\n", + "(India)-[:LANGUAGE_DIFFUSION]->(Sanskrit)\n", + "(Kautalyas Arthashastra)-[:DISCUSSES]->(India), description: Discusses the need for forest administration.\n", + "(Indian Forest Act Of 1865)-[:ESTABLISHES]->(India), description: Establishes government's claims over forests.\n", + "(Crown Land (Encroachment) Ordinance)-[:TARGETS]->(India), description: Targets forests in Britain's Asian colonies.\n", + "(Yajnavalkya Smriti)-[:PROHIBITS]->(India), description: Prohibits the cutting of trees.\n", + "(2013 Forest Survey Of India)-[:REPORTS_ON]->(India), description: Reports on forest cover increase.\n", + "(Supreme Court Of India)-[:PART_OF_JUDICIARY]->(India)\n", + "(Indian National Congress)-[:HISTORICALLY_DOMINANT_PARTY]->(India)\n", + "(Forest Act Of 1878)-[:GIVES_CONTROL]->(India), description: Gives control over all wastelands, including forests.\n", + "(Sir Dietrich Brandis)-[:INSPECTOR_GENERAL_OF]->(India), description: Inspector General of Forests in India from 1864 to 1883.\n", + "\n" + ] + } + ], + "source": [ + "print(prepare_string(community_info[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing communities: 0%| | 0/30 [00:00 pd.DataFrame:\n", + " \"\"\"Executes a Cypher statement and returns a DataFrame\"\"\"\n", + " return driver.execute_query(\n", + " cypher, parameters_=params, result_transformer_=Result.to_df\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Calculate before \n", + "db_query(\n", + " \"\"\"\n", + "MATCH (n:`__Community__`)<-[:IN_COMMUNITY]-()<-[:HAS_ENTITY]-(c)\n", + "WITH n, count(distinct c) AS chunkCount\n", + "SET n.weight = chunkCount\"\"\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## local and global search" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "index_name = \"entity_local\"\n", + "\n", + "# db_query(\n", + "# \"\"\"\n", + "# CREATE VECTOR INDEX \"\"\"\n", + "# + index_name\n", + "# + \"\"\" IF NOT EXISTS FOR (e:__Entity__) ON e.embedding\n", + "# OPTIONS {indexConfig: {\n", + "# `vector.dimensions`: 384,\n", + "# `vector.similarity_function`: 'cosine'\n", + "# }}\n", + "# \"\"\"\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "topChunks = 3\n", + "topCommunities = 3\n", + "topOutsideRels = 10\n", + "topInsideRels = 10\n", + "topEntities = 10" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "lc_retrieval_query = \"\"\"\n", + "WITH collect(node) as nodes\n", + "// Entity - Text Unit Mapping\n", + "WITH\n", + "collect {\n", + " UNWIND nodes as n\n", + " MATCH (n)<-[:HAS_ENTITY]->(c:Chunk)\n", + " WITH c, count(distinct n) as freq\n", + " RETURN c.text AS chunkText\n", + " ORDER BY freq DESC\n", + " LIMIT $topChunks\n", + "} AS text_mapping,\n", + "// Entity - Report Mapping\n", + "collect {\n", + " UNWIND nodes as n\n", + " MATCH (n)-[:IN_COMMUNITY]->(c:__Community__)\n", + " WITH c, c.rank as rank, c.weight AS weight\n", + " RETURN c.summary \n", + " ORDER BY rank, weight DESC\n", + " LIMIT $topCommunities\n", + "} AS report_mapping,\n", + "// Outside Relationships \n", + "collect {\n", + " UNWIND nodes as n\n", + " MATCH (n)-[r:RELATED]-(m) \n", + " WHERE NOT m IN nodes\n", + " RETURN r.description AS descriptionText\n", + " ORDER BY r.rank, r.weight DESC \n", + " LIMIT $topOutsideRels\n", + "} as outsideRels,\n", + "// Inside Relationships \n", + "collect {\n", + " UNWIND nodes as n\n", + " MATCH (n)-[r:RELATED]-(m) \n", + " WHERE m IN nodes\n", + " RETURN r.description AS descriptionText\n", + " ORDER BY r.rank, r.weight DESC \n", + " LIMIT $topInsideRels\n", + "} as insideRels,\n", + "// Entities description\n", + "collect {\n", + " UNWIND nodes as n\n", + " RETURN n.description AS descriptionText\n", + "} as entities\n", + "// We don't have covariates or claims here\n", + "RETURN {Chunks: text_mapping, Reports: report_mapping, \n", + " Relationships: outsideRels + insideRels, \n", + " Entities: entities} AS text, 1.0 AS score, {} AS metadata\n", + "\"\"\"\n", + "\n", + "embeddings, dimension = load_embedding_model(\"sentence_transformer\")\n", + "lc_vector = Neo4jVector.from_existing_index(\n", + " embeddings,\n", + " url=os.environ[\"NEO4J_URI\"],\n", + " username=os.environ[\"NEO4J_USERNAME\"],\n", + " password=os.environ[\"NEO4J_PASSWORD\"],\n", + " index_name=index_name,\n", + " retrieval_query=lc_retrieval_query,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "docs = lc_vector.similarity_search_with_score(\n", + " \"Major cities of India\",\n", + " k=topEntities,\n", + " params={\n", + " \"topChunks\": topChunks,\n", + " \"topCommunities\": topCommunities,\n", + " \"topOutsideRels\": topOutsideRels,\n", + " \"topInsideRels\": topInsideRels,\n", + " },\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Entities:\n", + "- None\n", + "- None\n", + "- None\n", + "- One of the three biodiversity hotspots in India.\n", + "- Officially the Republic of India, located in South Asia.\n", + "- Geographical realm that includes India.\n", + "- One of the three biodiversity hotspots in India.\n", + "- Defines the northern frontiers of India.\n", + "- One of the three biodiversity hotspots in India.\n", + "- Commonly used name for the Republic of India.\n", + "Reports:\n", + "- India, officially the Republic of India, is a country located in South Asia and is part of the Indomalayan Realm. It shares borders with several countries including China, Bhutan, Nepal, Pakistan, Bangladesh, and Myanmar, and is bounded by the Indian Ocean, Arabian Sea, and Bay of Bengal. India is known for its rich biodiversity, featuring hotspots like the Western Ghats, Eastern Himalayas, and Indo-Burma Hotspot, and has a significant forest cover, which has been increasing as reported by the 2013 Forest Survey of India.\n", + "\n", + "India has a complex history, with ancient civilizations like the Indus Valley Civilisation and significant cultural contributions such as the Rigveda, which marks the dawn of Hinduism. It is also the birthplace of Buddhism and Jainism. The country has a diverse linguistic heritage, including the diffusion of the Indo-European language Sanskrit.\n", + "\n", + "Economically, India is a major global player, ranking as the fifth-largest economy by nominal GDP and third-largest by purchasing power parity (PPP). It has a significant public sector, a large labor force, and is a member of the World Trade Organization. The country has adopted broad economic liberalization since 1991, leading to substantial GDP growth and a robust service sector that makes up more than 50% of its GDP. India also has a high number of billionaires and is considered a welfare state with significant social welfare spending.\n", + "\n", + "Politically, India has been a federal republic since 1950, with the Indian National Congress historically dominating its politics until 1977. The Supreme Court of India heads its independent judiciary. The country has a rich legislative history concerning forest administration, with significant acts like the Indian Forest Act of 1865 and the Forest Act of 1878, and contributions from figures like Sir Dietrich Brandis, the Inspector General of Forests from 1864 to 1883.\n", + "\n", + "India's geographical and cultural landscape is further enriched by its rivers, such as the Ganges and Sindhu (Indus River), and its proximity to neighboring countries like Sri Lanka and the Maldives. The country is also part of the larger Indomalayan Realm and has historical ties to Africa, from where modern humans arrived on the Indian subcontinent.\n", + "- India, officially the Republic of India, is a country located in South Asia and is part of the Indomalayan Realm. It shares borders with several countries including China, Bhutan, Nepal, Pakistan, Bangladesh, and Myanmar, and is bounded by the Indian Ocean, Arabian Sea, and Bay of Bengal. India is known for its rich biodiversity, featuring hotspots like the Western Ghats, Eastern Himalayas, and Indo-Burma Hotspot, and has a significant forest cover, which has been increasing as reported by the 2013 Forest Survey of India.\n", + "\n", + "India has a complex history, with ancient civilizations like the Indus Valley Civilisation and significant cultural contributions such as the Rigveda, which marks the dawn of Hinduism. It is also the birthplace of Buddhism and Jainism. The country has a diverse linguistic heritage, including the diffusion of the Indo-European language Sanskrit.\n", + "\n", + "Economically, India is a major global player, ranking as the fifth-largest economy by nominal GDP and third-largest by purchasing power parity (PPP). It has a significant public sector, a large labor force, and is a member of the World Trade Organization. The country has adopted broad economic liberalization since 1991, leading to substantial GDP growth and a robust service sector that makes up more than 50% of its GDP. India also has a high number of billionaires and is considered a welfare state with significant social welfare spending.\n", + "\n", + "Politically, India has been a federal republic since 1950, with the Indian National Congress historically dominating its politics until 1977. The Supreme Court of India heads its independent judiciary. The country has a rich legislative history concerning forest administration, with significant acts like the Indian Forest Act of 1865 and the Forest Act of 1878, and contributions from figures like Sir Dietrich Brandis, the Inspector General of Forests from 1864 to 1883.\n", + "\n", + "India's geographical and cultural landscape is further enriched by its rivers, such as the Ganges and Sindhu (Indus River), and its proximity to neighboring countries like Sri Lanka and the Maldives. The country is also part of the larger Indomalayan Realm and has historical ties to Africa, from where modern humans arrived on the Indian subcontinent.\n", + "- India, officially the Republic of India, is a country located in South Asia and is part of the Indomalayan Realm. It shares borders with several countries including China, Bhutan, Nepal, Pakistan, Bangladesh, and Myanmar, and is bounded by the Indian Ocean, Arabian Sea, and Bay of Bengal. India is known for its rich biodiversity, featuring hotspots like the Western Ghats, Eastern Himalayas, and Indo-Burma Hotspot, and has a significant forest cover, which has been increasing as reported by the 2013 Forest Survey of India.\n", + "\n", + "India has a complex history, with ancient civilizations like the Indus Valley Civilisation and significant cultural contributions such as the Rigveda, which marks the dawn of Hinduism. It is also the birthplace of Buddhism and Jainism. The country has a diverse linguistic heritage, including the diffusion of the Indo-European language Sanskrit.\n", + "\n", + "Economically, India is a major global player, ranking as the fifth-largest economy by nominal GDP and third-largest by purchasing power parity (PPP). It has a significant public sector, a large labor force, and is a member of the World Trade Organization. The country has adopted broad economic liberalization since 1991, leading to substantial GDP growth and a robust service sector that makes up more than 50% of its GDP. India also has a high number of billionaires and is considered a welfare state with significant social welfare spending.\n", + "\n", + "Politically, India has been a federal republic since 1950, with the Indian National Congress historically dominating its politics until 1977. The Supreme Court of India heads its independent judiciary. The country has a rich legislative history concerning forest administration, with significant acts like the Indian Forest Act of 1865 and the Forest Act of 1878, and contributions from figures like Sir Dietrich Brandis, the Inspector General of Forests from 1864 to 1883.\n", + "\n", + "India's geographical and cultural landscape is further enriched by its rivers, such as the Ganges and Sindhu (Indus River), and its proximity to neighboring countries like Sri Lanka and the Maldives. The country is also part of the larger Indomalayan Realm and has historical ties to Africa, from where modern humans arrived on the Indian subcontinent.\n", + "Chunks:\n", + "- India is one of the most biodiverse regions and is home to a large variety of wildlife. It is one of the 17 megadiverse countries and includes three of the worlds 36 biodiversity hotspots – the Western Ghats, the Eastern Himalayas, and the Indo-Burma hotspot. About 24.6% of the total land area is covered by forests. It has various ecosystems ranging from the high altitude Himalayas, tropical evergreen forests along the Western Ghats, desert in the north-west, coastal plains and mangroves along the peninsular region. India lies within the Indomalayan realm and is home to about 7.6% of mammal, 14.7% of amphibian, 6% of bird, 6.2% of reptilian, and 6.2% of flowering plant species. Human encroachment, deforestation and poaching are significant challenges that threaten the existence of certain fauna and flora. Government of India established a system of national parks and\n", + "- threaten the existence of certain fauna and flora. Government of India established a system of national parks and protected areas in 1935, which have been subsequently expanded to nearly 1022 protected areas by 2023. India has enacted the Wildlife Protection Act of 1972 and special projects such as Project Tiger, Project Elephant and Project Dolphin for protection of critical species. == Fauna == India has an estimated 92,873 species of fauna, roughly about 7.5% of the species available worldwide. Insects form the major category with 63423 recorded species. India is home to 423 mammals, 1233 birds, 526 reptiles, 342 amphibians, 3022 fish apart from other species which form 7.6% of mammal, 14.7% of amphibian, 6% of bird, 6.2% of reptilian species worldwide. Many Indian species are descendants of species originating in Gondwana, of which India originally was a part. Peninsular Indias subsequent movement towards\n", + "- Gondwana, of which India originally was a part. Peninsular Indias subsequent movement towards, and collision with, the Laurasian landmass set off a mass exchange of species. However, volcanism in the Deccan Traps and climatic change 20 million years ago caused the extinction of many endemic Indian forms. Soon thereafter, mammals entered India from Asia through two zoogeographical passes on either side of the emerging Himalayas. As a result, among Indian species, only 12.6% of mammals and 4.5% of birds are endemic, contrasting with 45.8% of reptiles and 55.8% of amphibians India is home to several well-known large animals, including the Indian elephant, Indian rhinoceros, and Gaur. India is the only country where the big cats tiger and lion exist in the wild. Members of the cat family include Bengal tiger, Asiatic lion, Indian leopard, snow leopard, and clouded\n", + "Relationships:\n", + "\n" + ] + } + ], + "source": [ + "print(docs[0][0].page_content)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "model = \"openai-gpt-4o\"\n", + "llm, model_name = get_llm(model)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "MAP_SYSTEM_PROMPT = \"\"\"\n", + "---Role---\n", + "\n", + "You are a helpful assistant responding to questions about data in the tables provided.\n", + "\n", + "\n", + "---Goal---\n", + "\n", + "Generate a response consisting of a list of key points that responds to the user's question, summarizing all relevant information in the input data tables.\n", + "\n", + "You should use the data provided in the data tables below as the primary context for generating the response.\n", + "If you don't know the answer or if the input data tables do not contain sufficient information to provide an answer, just say so. Do not make anything up.\n", + "\n", + "Each key point in the response should have the following element:\n", + "- Description: A comprehensive description of the point.\n", + "- Importance Score: An integer score between 0-100 that indicates how important the point is in answering the user's question. An 'I don't know' type of response should have a score of 0.\n", + "\n", + "The response should be JSON formatted as follows:\n", + "{{\n", + " \"points\": [\n", + " {{\"description\": \"Description of point 1 [Data: Reports (report ids)]\", \"score\": score_value}},\n", + " {{\"description\": \"Description of point 2 [Data: Reports (report ids)]\", \"score\": score_value}}\n", + " ]\n", + "}}\n", + "\n", + "The response shall preserve the original meaning and use of modal verbs such as \"shall\", \"may\" or \"will\".\n", + "\n", + "Points supported by data should list the relevant reports as references as follows:\n", + "\"This is an example sentence supported by data references [Data: Reports (report ids)]\"\n", + "\n", + "**Do not list more than 5 record ids in a single reference**. Instead, list the top 5 most relevant record ids and add \"+more\" to indicate that there are more.\n", + "\n", + "For example:\n", + "\"Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (2, 7, 64, 46, 34, +more)]. He is also CEO of company X [Data: Reports (1, 3)]\"\n", + "\n", + "where 1, 2, 3, 7, 34, 46, and 64 represent the id (not the index) of the relevant data report in the provided tables.\n", + "\n", + "Do not include information where the supporting evidence for it is not provided.\n", + "\n", + "\n", + "---Data tables---\n", + "\n", + "{context_data}\n", + "\n", + "---Goal---\n", + "\n", + "Generate a response consisting of a list of key points that responds to the user's question, summarizing all relevant information in the input data tables.\n", + "\n", + "You should use the data provided in the data tables below as the primary context for generating the response.\n", + "If you don't know the answer or if the input data tables do not contain sufficient information to provide an answer, just say so. Do not make anything up.\n", + "\n", + "Each key point in the response should have the following element:\n", + "- Description: A comprehensive description of the point.\n", + "- Importance Score: An integer score between 0-100 that indicates how important the point is in answering the user's question. An 'I don't know' type of response should have a score of 0.\n", + "\n", + "The response shall preserve the original meaning and use of modal verbs such as \"shall\", \"may\" or \"will\".\n", + "\n", + "Points supported by data should list the relevant reports as references as follows:\n", + "\"This is an example sentence supported by data references [Data: Reports (report ids)]\"\n", + "\n", + "**Do not list more than 5 record ids in a single reference**. Instead, list the top 5 most relevant record ids and add \"+more\" to indicate that there are more.\n", + "\n", + "For example:\n", + "\"Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (2, 7, 64, 46, 34, +more)]. He is also CEO of company X [Data: Reports (1, 3)]\"\n", + "\n", + "where 1, 2, 3, 7, 34, 46, and 64 represent the id (not the index) of the relevant data report in the provided tables.\n", + "\n", + "Do not include information where the supporting evidence for it is not provided.\n", + "\n", + "The response should be JSON formatted as follows:\n", + "{{\n", + " \"points\": [\n", + " {{\"description\": \"Description of point 1 [Data: Reports (report ids)]\", \"score\": score_value}},\n", + " {{\"description\": \"Description of point 2 [Data: Reports (report ids)]\", \"score\": score_value}}\n", + " ]\n", + "}}\n", + "\"\"\"\n", + "\n", + "map_prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " MAP_SYSTEM_PROMPT,\n", + " ),\n", + " (\n", + " \"human\",\n", + " \"{question}\",\n", + " ),\n", + " ]\n", + ")\n", + "\n", + "map_chain = map_prompt | llm | StrOutputParser()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "REDUCE_SYSTEM_PROMPT = \"\"\"\n", + "---Role---\n", + "\n", + "You are a helpful assistant responding to questions about a dataset by synthesizing perspectives from multiple analysts.\n", + "\n", + "\n", + "---Goal---\n", + "\n", + "Generate a response of the target length and format that responds to the user's question, summarize all the reports from multiple analysts who focused on different parts of the dataset.\n", + "\n", + "Note that the analysts' reports provided below are ranked in the **descending order of importance**.\n", + "\n", + "If you don't know the answer or if the provided reports do not contain sufficient information to provide an answer, just say so. Do not make anything up.\n", + "\n", + "The final response should remove all irrelevant information from the analysts' reports and merge the cleaned information into a comprehensive answer that provides explanations of all the key points and implications appropriate for the response length and format.\n", + "\n", + "Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown.\n", + "\n", + "The response shall preserve the original meaning and use of modal verbs such as \"shall\", \"may\" or \"will\".\n", + "\n", + "The response should also preserve all the data references previously included in the analysts' reports, but do not mention the roles of multiple analysts in the analysis process.\n", + "\n", + "**Do not list more than 5 record ids in a single reference**. Instead, list the top 5 most relevant record ids and add \"+more\" to indicate that there are more.\n", + "\n", + "For example:\n", + "\n", + "\"Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (2, 7, 34, 46, 64, +more)]. He is also CEO of company X [Data: Reports (1, 3)]\"\n", + "\n", + "where 1, 2, 3, 7, 34, 46, and 64 represent the id (not the index) of the relevant data record.\n", + "\n", + "Do not include information where the supporting evidence for it is not provided.\n", + "\n", + "\n", + "---Target response length and format---\n", + "\n", + "{response_type}\n", + "\n", + "\n", + "---Analyst Reports---\n", + "\n", + "{report_data}\n", + "\n", + "\n", + "---Goal---\n", + "\n", + "Generate a response of the target length and format that responds to the user's question, summarize all the reports from multiple analysts who focused on different parts of the dataset.\n", + "\n", + "Note that the analysts' reports provided below are ranked in the **descending order of importance**.\n", + "\n", + "If you don't know the answer or if the provided reports do not contain sufficient information to provide an answer, just say so. Do not make anything up.\n", + "\n", + "The final response should remove all irrelevant information from the analysts' reports and merge the cleaned information into a comprehensive answer that provides explanations of all the key points and implications appropriate for the response length and format.\n", + "\n", + "The response shall preserve the original meaning and use of modal verbs such as \"shall\", \"may\" or \"will\".\n", + "\n", + "The response should also preserve all the data references previously included in the analysts' reports, but do not mention the roles of multiple analysts in the analysis process.\n", + "\n", + "**Do not list more than 5 record ids in a single reference**. Instead, list the top 5 most relevant record ids and add \"+more\" to indicate that there are more.\n", + "\n", + "For example:\n", + "\n", + "\"Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (2, 7, 34, 46, 64, +more)]. He is also CEO of company X [Data: Reports (1, 3)]\"\n", + "\n", + "where 1, 2, 3, 7, 34, 46, and 64 represent the id (not the index) of the relevant data record.\n", + "\n", + "Do not include information where the supporting evidence for it is not provided.\n", + "\n", + "\n", + "---Target response length and format---\n", + "\n", + "{response_type}\n", + "\n", + "Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown.\n", + "\"\"\"\n", + "\n", + "reduce_prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " REDUCE_SYSTEM_PROMPT,\n", + " ),\n", + " (\n", + " \"human\",\n", + " \"{question}\",\n", + " ),\n", + " ]\n", + ")\n", + "reduce_chain = reduce_prompt | llm | StrOutputParser()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "graph = Neo4jGraph(\n", + " url=os.environ[\"NEO4J_URI\"],\n", + " username=os.environ[\"NEO4J_USERNAME\"],\n", + " password=os.environ[\"NEO4J_PASSWORD\"],\n", + " refresh_schema=False,\n", + ")\n", + "\n", + "response_type: str = \"multiple paragraphs\"\n", + "\n", + "\n", + "def global_retriever(query: str, level: int, response_type: str = response_type) -> str:\n", + " community_data = graph.query(\n", + " \"\"\"\n", + " MATCH (c:__Community__)\n", + " WHERE c.level = $level\n", + " RETURN c.full_content AS output\n", + " \"\"\",\n", + " params={\"level\": level},\n", + " )\n", + " intermediate_results = []\n", + " for community in tqdm(community_data, desc=\"Processing communities\"):\n", + " intermediate_response = map_chain.invoke(\n", + " {\"question\": query, \"context_data\": community[\"output\"]}\n", + " )\n", + " intermediate_results.append(intermediate_response)\n", + " final_response = reduce_chain.invoke(\n", + " {\n", + " \"report_data\": intermediate_results,\n", + " \"question\": query,\n", + " \"response_type\": response_type,\n", + " }\n", + " )\n", + " return final_response" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.UnknownPropertyKeyWarning} {category: UNRECOGNIZED} {title: The provided property key is not in the database} {description: One of the property names in your query is not available in the database, make sure you didn't misspell it or that the label is available when you run this statement in your application (the missing property name is: full_content)} {position: line: 4, column: 14, offset: 69} for query: '\\n MATCH (c:__Community__)\\n WHERE c.level = $level\\n RETURN c.full_content AS output\\n '\n", + "Processing communities: 0it [00:00, ?it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "I'm sorry, but the provided reports do not contain sufficient information to provide an answer to your question about the major cities of India. If you have any other questions or need information on a different topic, please let me know!\n" + ] + } + ], + "source": [ + "print(global_retriever(\"Major cities of India?\", 2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chatbotenv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/frontend/src/App.css b/frontend/src/App.css index fe285c972..e912a05e2 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -121,7 +121,8 @@ height: 55px; object-fit: contain; } -.webImg{ + +.webImg { width: 80px; height: 80px; } @@ -240,9 +241,11 @@ .ndl-widget-content>div { overflow-wrap: break-word } -.word-break{ + +.word-break { word-break: break-word; } + .cellClass { width: 100%; height: 100%; @@ -345,22 +348,28 @@ margin-bottom: 0 !important; } -.node_label__value-container--has-value ,.relationship_label__value-container--has-value{ +.node_label__value-container--has-value, +.relationship_label__value-container--has-value { max-height: 215px; overflow-y: scroll !important; scrollbar-width: thin; } -.entity_extraction_Tab_node_label__value-container--has-value,.entity_extraction_Tab_relationship_label__value-container--has-value{ + +.entity_extraction_Tab_node_label__value-container--has-value, +.entity_extraction_Tab_relationship_label__value-container--has-value { max-height: 100px; overflow-y: scroll !important; scrollbar-width: thin; } -.tablet_entity_extraction_Tab_node_label__value-container--has-value,.tablet_entity_extraction_Tab_relationship_label__value-container--has-value{ + +.tablet_entity_extraction_Tab_node_label__value-container--has-value, +.tablet_entity_extraction_Tab_relationship_label__value-container--has-value { max-height: 80px; overflow-y: scroll !important; scrollbar-width: thin; } -.widthunset{ + +.widthunset { width: initial !important; height: initial !important; } @@ -372,4 +381,9 @@ .text-input-container.search-initiated { width: 60dvh; +} + +.custom-menu { + min-width: 250px; + max-width: 305px; } \ No newline at end of file diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 3c6642665..53172256a 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -1,44 +1,37 @@ import { Box, Typography, - TextLink, Flex, Tabs, - LoadingSpinner, CypherCodeBlock, CypherCodeBlockProps, useCopyToClipboard, Banner, useMediaQuery, } from '@neo4j-ndl/react'; -import { DocumentDuplicateIconOutline, DocumentTextIconOutline } from '@neo4j-ndl/react/icons'; +import { DocumentDuplicateIconOutline, ClipboardDocumentCheckIconOutline } from '@neo4j-ndl/react/icons'; import '../../styling/info.css'; import Neo4jRetrievalLogo from '../../assets/images/Neo4jRetrievalLogo.png'; -import wikipedialogo from '../../assets/images/wikipedia.svg'; -import youtubelogo from '../../assets/images/youtube.svg'; -import gcslogo from '../../assets/images/gcs.webp'; -import s3logo from '../../assets/images/s3logo.png'; import { Chunk, + Community, Entity, ExtendedNode, ExtendedRelationship, - GroupedEntity, UserCredentials, chatInfoMessage, } from '../../types'; import { useContext, useEffect, useMemo, useState } from 'react'; -import HoverableLink from '../UI/HoverableLink'; import GraphViewButton from '../Graph/GraphViewButton'; import { chunkEntitiesAPI } from '../../services/ChunkEntitiesInfo'; import { useCredentials } from '../../context/UserCredentials'; -import { calcWordColor } from '@neo4j-devtools/word-color'; -import ReactMarkdown from 'react-markdown'; -import { GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; -import { parseEntity, youtubeLinkValidation } from '../../utils/Utils'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; -import { ClipboardDocumentCheckIconOutline } from '@neo4j-ndl/react/icons'; import { tokens } from '@neo4j-ndl/base'; +import ChunkInfo from './ChunkInfo'; +import EntitiesInfo from './EntitiesInfo'; +import SourcesInfo from './SourcesInfo'; +import CommunitiesInfo from './Communities'; +import { chatModeLables } from '../../utils/Constants'; const ChatInfoModal: React.FC = ({ sources, @@ -53,8 +46,9 @@ const ChatInfoModal: React.FC = ({ }) => { const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); - const [activeTab, setActiveTab] = useState(error.length ? 10 : mode === 'graph' ? 4 : 3); + const [activeTab, setActiveTab] = useState(error?.length ? 10 : mode === chatModeLables.graph ? 4 : 3); const [infoEntities, setInfoEntities] = useState([]); + const [communities, setCommunities] = useState([]); const [loading, setLoading] = useState(false); const { userCredentials } = useCredentials(); const [nodes, setNodes] = useState([]); @@ -86,73 +80,79 @@ const ChatInfoModal: React.FC = ({ ], [copiedText, cypher_query] ); + useEffect(() => { - if (mode != 'graph' || error?.trim() !== '') { - setLoading(true); - chunkEntitiesAPI(userCredentials as UserCredentials, chunk_ids.map((c) => c.id).join(',')) - .then((response) => { - setInfoEntities(response.data.data.nodes); - setNodes(response.data.data.nodes); - setRelationships(response.data.data.relationships); - const chunks = response.data.data.chunk_data.map((chunk: any) => { - const chunkScore = chunk_ids.find((chunkdetail) => chunkdetail.id === chunk.id); - return { - ...chunk, - score: chunkScore?.score, - }; - }); - const sortedchunks = chunks.sort((a: any, b: any) => b.score - a.score); - setChunks(sortedchunks); + if (mode != chatModeLables.graph || error?.trim() !== '') { + (async () => { + setLoading(true); + try { + const response = await chunkEntitiesAPI( + userCredentials as UserCredentials, + chunk_ids.map((c) => c.id).join(','), + userCredentials?.database, + mode === chatModeLables.entity_vector + ); + if (response.data.status === 'Failure') { + throw new Error(response.data.error); + } + const nodesData = response?.data?.data?.nodes; + const relationshipsData = response?.data?.data?.relationships; + const communitiesData = response?.data?.data?.community_data; + const chunksData = response?.data?.data?.chunk_data; + + setInfoEntities( + nodesData.map((n: Entity) => { + if (!n.labels.length && mode === chatModeLables.entity_vector) { + return { + ...n, + labels: ['Entity'], + }; + } + return n; + }) + ); + setNodes( + nodesData.map((n: ExtendedNode) => { + if (!n.labels.length && mode === chatModeLables.entity_vector) { + return { + ...n, + labels: ['Entity'], + }; + } + return n ?? []; + }) + ); + setRelationships(relationshipsData ?? []); + setCommunities(communitiesData ?? []); + setChunks( + chunksData + .map((chunk: any) => { + const chunkScore = chunk_ids.find((chunkdetail) => chunkdetail.id === chunk.id); + return ( + { + ...chunk, + score: chunkScore?.score, + } ?? [] + ); + }) + .sort((a: any, b: any) => b.score - a.score) + ); setLoading(false); - }) - .catch((error) => { - console.error('Error fetching entities:', error); + } catch (error) { + console.error('Error fetching information:', error); setLoading(false); - }); + } + })(); } - () => { setcopiedText(false); }; }, [chunk_ids, mode, error]); - const groupedEntities = useMemo<{ [key: string]: GroupedEntity }>(() => { - return infoEntities.reduce((acc, entity) => { - const { label, text } = parseEntity(entity); - if (!acc[label]) { - const newColor = calcWordColor(label); - acc[label] = { texts: new Set(), color: newColor }; - } - acc[label].texts.add(text); - return acc; - }, {} as Record; color: string }>); - }, [infoEntities]); + const onChangeTabs = (tabId: number) => { setActiveTab(tabId); }; - const labelCounts = useMemo(() => { - const counts: { [label: string]: number } = {}; - for (let index = 0; index < infoEntities.length; index++) { - const entity = infoEntities[index]; - const { labels } = entity; - const [label] = labels; - counts[label] = counts[label] ? counts[label] + 1 : 1; - } - return counts; - }, [infoEntities]); - const sortedLabels = useMemo(() => { - return Object.keys(labelCounts).sort((a, b) => labelCounts[b] - labelCounts[a]); - }, [labelCounts]); - const generateYouTubeLink = (url: string, startTime: string) => { - try { - const urlObj = new URL(url); - urlObj.searchParams.set('t', startTime); - return urlObj.toString(); - } catch (error) { - console.error('Invalid URL:', error); - return ''; - } - }; return ( @@ -175,257 +175,38 @@ const ChatInfoModal: React.FC = ({ {error} ) : ( - {mode != 'graph' ? Sources used : <>} - {mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? ( + {mode != chatModeLables.graph ? Sources used : <>} + {mode != chatModeLables.graph ? Chunks : <>} + {mode === chatModeLables.graph_vector || + mode === chatModeLables.graph || + mode === chatModeLables.graph_vector_fulltext || + mode === chatModeLables.entity_vector ? ( Top Entities used ) : ( <> )} - {mode === 'graph' && cypher_query?.trim().length ? ( + {mode === chatModeLables.graph && cypher_query?.trim()?.length ? ( Generated Cypher Query ) : ( <> )} - {mode != 'graph' ? Chunks : <>} + {mode === chatModeLables.entity_vector? Communities : <>} )} - {sources.length ? ( -
      - {sources.map((link, index) => { - return ( -
    • - {link?.startsWith('http') || link?.startsWith('https') ? ( - <> - {link?.includes('wikipedia.org') && ( -
      - Wikipedia Logo - - - - {link} - - - -
      - )} - {link?.includes('storage.googleapis.com') && ( -
      - Google Cloud Storage Logo - - {decodeURIComponent(link).split('/').at(-1)?.split('?')[0] ?? 'GCS File'} - -
      - )} - {youtubeLinkValidation(link) && ( - <> -
      - - - - - {link} - - - -
      - - )} - {!link?.startsWith('s3://') && - !link?.includes('storage.googleapis.com') && - !link?.includes('wikipedia.org') && - !link?.includes('youtube.com') && ( -
      - - - {link} - -
      - )} - - ) : link?.startsWith('s3://') ? ( -
      - S3 Logo - - {decodeURIComponent(link).split('/').at(-1) ?? 'S3 File'} - -
      - ) : ( -
      - - - {link} - -
      - )} -
    • - ); - })} -
    - ) : ( - No Sources Found - )} +
    - {loading ? ( - - - - ) : Object.keys(groupedEntities).length > 0 || Object.keys(graphonly_entities).length > 0 ? ( -
      - {mode == 'graph' - ? graphonly_entities.map((label, index) => ( -
    • -
      - { - // @ts-ignore - label[Object.keys(label)[0]].id ?? Object.keys(label)[0] - } -
      -
    • - )) - : sortedLabels.map((label, index) => ( -
    • -
      - {label} ({labelCounts[label]}) -
      - - {Array.from(groupedEntities[label].texts).slice(0, 3).join(', ')} - -
    • - ))} -
    - ) : ( - No Entities Found - )} +
    - {loading ? ( - - - - ) : chunks.length > 0 ? ( -
    -
      - {chunks.map((chunk) => ( -
    • - {chunk?.page_number ? ( - <> -
      - - - {/* {chunk?.fileName}, Page: {chunk?.page_number} */} - {chunk?.fileName} - -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.start_time ? ( - <> -
      - - - - {chunk?.fileName} - - -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.url.includes('wikipedia.org') ? ( - <> -
      - - {chunk?.fileName} -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.url.includes('storage.googleapis.com') ? ( - <> -
      - - {chunk?.fileName} -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.url.startsWith('s3://') ? ( - <> -
      - - {chunk?.fileName} -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && - !chunk?.url.startsWith('s3://') && - !chunk?.url.includes('storage.googleapis.com') && - !chunk?.url.includes('wikipedia.org') && - !chunk?.url.includes('youtube.com') ? ( - <> -
      - - - {chunk?.url} - -
      - Similarity Score: {chunk?.score} - - ) : chunk.fileSource === 'local file' ? ( - <> - Similarity Score: {chunk?.score} - - ) : ( - <> - )} - {chunk?.text} -
    • - ))} -
    -
    - ) : ( - No Chunks Found - )} +
    = ({ className='min-h-40' /> + {mode === chatModeLables.entity_vector ? ( + + + + ) : ( + <> + )}
    - {activeTab == 4 && nodes.length && relationships.length ? ( + {activeTab == 4 && nodes?.length && relationships?.length ? ( diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index e82dfea4d..6c9bf459f 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -1,13 +1,14 @@ -import { StatusIndicator } from '@neo4j-ndl/react'; -import { useMemo } from 'react'; +import { StatusIndicator, Typography } from '@neo4j-ndl/react'; +import { useMemo, useEffect } from 'react'; import { useFileContext } from '../../context/UsersFiles'; import CustomMenu from '../UI/Menu'; -import { chatModes } from '../../utils/Constants'; +import { chatModeLables, chatModes } from '../../utils/Constants'; import { capitalize } from '@mui/material'; - +import { capitalizeWithPlus } from '../../utils/Utils'; +import { useCredentials } from '../../context/UserCredentials'; export default function ChatModeToggle({ menuAnchor, - closeHandler = () => {}, + closeHandler = () => { }, open, anchorPortal = true, disableBackdrop = false, @@ -18,7 +19,70 @@ export default function ChatModeToggle({ anchorPortal?: boolean; disableBackdrop?: boolean; }) { - const { setchatMode, chatMode } = useFileContext(); + const { setchatMode, chatMode, postProcessingTasks, selectedRows } = useFileContext(); + const isCommunityAllowed = postProcessingTasks.includes('create_communities'); + const { isGdsActive } = useCredentials(); + + useEffect(() => { + if (selectedRows.length !== 0) { + setchatMode(chatModeLables.graph_vector); + } else { + setchatMode(chatModeLables.graph_vector_fulltext); + } + }, [selectedRows]); + + const memoizedChatModes = useMemo(() => { + return isGdsActive && isCommunityAllowed + ? chatModes + : chatModes?.filter((m) => !m.mode.includes(chatModeLables.entity_vector)); + }, [isGdsActive, isCommunityAllowed]); + const menuItems = useMemo(() => { + return memoizedChatModes?.map((m) => { + const isDisabled = Boolean(selectedRows.length && !(m.mode === chatModeLables.vector || m.mode === chatModeLables.graph_vector)); + const handleModeChange = () => { + if (isDisabled) { + setchatMode(chatModeLables.graph_vector); + } else { + setchatMode(m.mode); + } + closeHandler(); + }; + return { + title: ( +
    + + {m.mode.includes('+') ? capitalizeWithPlus(m.mode) : capitalize(m.mode)} + +
    + {m.description} +
    +
    + ), + onClick: handleModeChange, + disabledCondition: isDisabled, + description: ( + + {chatMode === m.mode && ( + <> + {chatModeLables.selected} + + )} + {isDisabled && ( + <> + {chatModeLables.unavailableChatMode} + + )} + + ), + }; + }); + }, [chatMode, memoizedChatModes, setchatMode, closeHandler, selectedRows]); + + useEffect(() => { + if (!selectedRows.length && !chatMode) { + setchatMode(chatMode); + } + }, [setchatMode, selectedRows, chatMode]); return ( - chatModes?.map((m) => { - return { - title: m.includes('+') - ? m - .split('+') - .map((s) => capitalize(s)) - .join('+') - : capitalize(m), - onClick: () => { - setchatMode(m); - }, - disabledCondition: false, - description: ( - - {chatMode === m && ( - <> - Selected - - )} - - ), - }; - }), - [chatMode, chatModes] - )} - > + items={menuItems} + /> ); } diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 7352b0ff4..412f485ed 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -14,8 +14,8 @@ import { v4 as uuidv4 } from 'uuid'; import { useFileContext } from '../../context/UsersFiles'; import clsx from 'clsx'; import ReactMarkdown from 'react-markdown'; -import IconButtonWithToolTip from '../UI/IconButtonToolTip'; -import { buttonCaptions, tooltips } from '../../utils/Constants'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; +import { buttonCaptions, chatModeLables, tooltips } from '../../utils/Constants'; import useSpeechSynthesis from '../../hooks/useSpeech'; import ButtonWithToolTip from '../UI/ButtonWithToolTip'; import FallBackDialog from '../UI/FallBackDialog'; @@ -44,7 +44,7 @@ const Chatbot: FC = (props) => { const [tokensUsed, setTokensUsed] = useState(0); const [cypherQuery, setcypherQuery] = useState(''); const [copyMessageId, setCopyMessageId] = useState(null); - const [chatsMode, setChatsMode] = useState('graph+vector'); + const [chatsMode, setChatsMode] = useState(chatModeLables.graph_vector); const [graphEntitites, setgraphEntitites] = useState<[]>([]); const [messageError, setmessageError] = useState(''); @@ -89,6 +89,7 @@ const Chatbot: FC = (props) => { cypher_query?: string; graphonly_entities?: []; error?: string; + entitiysearchonly_entities?: chunk[]; }, index = 0 ) => { @@ -119,6 +120,7 @@ const Chatbot: FC = (props) => { cypher_query: response?.cypher_query, graphonly_entities: response?.graphonly_entities, error: response.error, + entitiysearchonly_entities: response.entitiysearchonly_entities, }, ]); } else { @@ -141,6 +143,7 @@ const Chatbot: FC = (props) => { lastmsg.cypher_query = response.cypher_query; lastmsg.graphonly_entities = response.graphonly_entities; lastmsg.error = response.error; + lastmsg.entities = response.entitiysearchonly_entities; return msgs.map((msg, index) => { if (index === msgs.length - 1) { return lastmsg; @@ -174,6 +177,7 @@ const Chatbot: FC = (props) => { let cypher_query; let graphonly_entities; let error; + let entitiysearchonly_entities; const datetime = `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`; const userMessage = { id: Date.now(), user: 'user', message: inputMessage, datetime: datetime }; setListMessages([...listMessages, userMessage]); @@ -198,6 +202,7 @@ const Chatbot: FC = (props) => { chatingMode = chatresponse?.data?.data?.info?.mode; cypher_query = chatresponse?.data?.data?.info?.cypher_query ?? ''; graphonly_entities = chatresponse?.data.data.info.context ?? []; + entitiysearchonly_entities = chatresponse?.data.data.info.entities; error = chatresponse.data.data.info.error ?? ''; const finalbotReply = { reply: chatbotReply, @@ -212,6 +217,7 @@ const Chatbot: FC = (props) => { cypher_query, graphonly_entities, error, + entitiysearchonly_entities, }; simulateTypingEffect(finalbotReply); } catch (error) { @@ -349,7 +355,9 @@ const Chatbot: FC = (props) => { setModelModal(chat.model ?? ''); setSourcesModal(chat.sources ?? []); setResponseTime(chat.response_time ?? 0); - setChunkModal(chat.chunk_ids ?? []); + setChunkModal( + chat.mode === 'entity search+vector' ? chat.entities ?? [] : chat.chunk_ids ?? [] + ); setTokensUsed(chat.total_tokens ?? 0); setcypherQuery(chat.cypher_query ?? ''); setShowInfoModal(true); diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx new file mode 100644 index 000000000..350b41b98 --- /dev/null +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -0,0 +1,125 @@ +import { FC, useContext } from 'react'; +import { ChunkProps } from '../../types'; +import { Box, LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; +import { DocumentTextIconOutline, GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; +import wikipedialogo from '../../assets/images/wikipedia.svg'; +import youtubelogo from '../../assets/images/youtube.svg'; +import gcslogo from '../../assets/images/gcs.webp'; +import s3logo from '../../assets/images/s3logo.png'; +import ReactMarkdown from 'react-markdown'; +import { generateYouTubeLink, getLogo, isAllowedHost } from '../../utils/Utils'; +import { ThemeWrapperContext } from '../../context/ThemeWrapper'; + +const ChunkInfo: FC = ({ loading, chunks }) => { + const themeUtils = useContext(ThemeWrapperContext); + + return ( + <> + {loading ? ( + + + + ) : chunks?.length > 0 ? ( +
    +
      + {chunks.map((chunk) => ( +
    • + {chunk?.page_number ? ( + <> +
      + + + {chunk?.fileName} + +
      + Similarity Score: {chunk?.score} + + ) : chunk?.url && chunk?.start_time ? ( + <> +
      + + + + {chunk?.fileName} + + +
      + Similarity Score: {chunk?.score} + + ) : chunk?.url && new URL(chunk.url).host === 'wikipedia.org' ? ( + <> +
      + + {chunk?.fileName} +
      + Similarity Score: {chunk?.score} + + ) : chunk?.url && new URL(chunk.url).host === 'storage.googleapis.com' ? ( + <> +
      + + {chunk?.fileName} +
      + Similarity Score: {chunk?.score} + + ) : chunk?.url && chunk?.url.startsWith('s3://') ? ( + <> +
      + + {chunk?.fileName} +
      + Similarity Score: {chunk?.score} + + ) : chunk?.url && + !chunk?.url.startsWith('s3://') && + !isAllowedHost(chunk?.url, ['storage.googleapis.com', 'wikipedia.org', 'youtube.com']) ? ( + <> +
      + + + {chunk?.url} + +
      + Similarity Score: {chunk?.score} + + ) : ( + <> +
      + {chunk.fileSource === 'local file' ? ( + + ) : ( + + )} + + {chunk.fileName} + +
      + + )} + {chunk?.text} +
    • + ))} +
    +
    + ) : ( + No Chunks Found + )} + + ); +}; + +export default ChunkInfo; diff --git a/frontend/src/components/ChatBot/Communities.tsx b/frontend/src/components/ChatBot/Communities.tsx new file mode 100644 index 000000000..9b530fd0f --- /dev/null +++ b/frontend/src/components/ChatBot/Communities.tsx @@ -0,0 +1,36 @@ +import { Box, LoadingSpinner, Flex, Typography } from '@neo4j-ndl/react'; +import { FC } from 'react'; +import ReactMarkdown from 'react-markdown'; +import { CommunitiesProps } from '../../types'; + +const CommunitiesInfo: FC = ({ loading, communities }) => { + return ( + <> + {loading ? ( + + + + ) : communities?.length > 0 ? ( +
    +
      + {communities.map((community, index) => ( +
    • +
      + + ID : + {community.id} + + {community.summary} +
      +
    • + ))} +
    +
    + ) : ( + No Communities Found + )} + + ); +}; + +export default CommunitiesInfo; diff --git a/frontend/src/components/ChatBot/EntitiesInfo.tsx b/frontend/src/components/ChatBot/EntitiesInfo.tsx new file mode 100644 index 000000000..6c6e0784e --- /dev/null +++ b/frontend/src/components/ChatBot/EntitiesInfo.tsx @@ -0,0 +1,91 @@ +import { Box, GraphLabel, LoadingSpinner, Typography } from '@neo4j-ndl/react'; +import { FC, useMemo } from 'react'; +import { EntitiesProps, GroupedEntity } from '../../types'; +import { calcWordColor } from '@neo4j-devtools/word-color'; +import { graphLabels } from '../../utils/Constants'; +import { parseEntity } from '../../utils/Utils'; + +const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, infoEntities }) => { + const groupedEntities = useMemo<{ [key: string]: GroupedEntity }>(() => { + const items = infoEntities.reduce((acc, entity) => { + const { label, text } = parseEntity(entity); + if (!acc[label]) { + const newColor = calcWordColor(label); + acc[label] = { texts: new Set(), color: newColor }; + } + acc[label].texts.add(text); + return acc; + }, {} as Record; color: string }>); + return items; + }, [infoEntities]); + + const labelCounts = useMemo(() => { + const counts: { [label: string]: number } = {}; + for (let index = 0; index < infoEntities?.length; index++) { + const entity = infoEntities[index]; + const { labels } = entity; + const [label] = labels; + counts[label] = counts[label] ? counts[label] + 1 : 1; + } + return counts; + }, [infoEntities]); + + const sortedLabels = useMemo(() => { + return Object.keys(labelCounts).sort((a, b) => labelCounts[b] - labelCounts[a]); + }, [labelCounts]); + return ( + <> + {loading ? ( + + + + ) : Object.keys(groupedEntities)?.length > 0 || Object.keys(graphonly_entities)?.length > 0 ? ( +
      + {mode == 'graph' + ? graphonly_entities.map((label, index) => ( +
    • +
      + { + // @ts-ignore + label[Object.keys(label)[0]].id ?? Object.keys(label)[0] + } +
      +
    • + )) + : sortedLabels.map((label, index) => { + const entity = groupedEntities[label == 'undefined' ? 'Entity' : label]; + return ( +
    • + e.preventDefault()} + > + {label === '__Community__' ? graphLabels.community : label} ({labelCounts[label]}) + + + {Array.from(entity.texts).slice(0, 3).join(', ')} + +
    • + ); + })} +
    + ) : ( + No Entities Found + )} + + ); +}; + +export default EntitiesInfo; diff --git a/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx index 49925466f..465687c4b 100644 --- a/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx +++ b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx @@ -2,7 +2,7 @@ import { TrashIconOutline, XMarkIconOutline } from '@neo4j-ndl/react/icons'; import ChatModeToggle from './ChatModeToggle'; import { Box, IconButton } from '@neo4j-ndl/react'; import { IconProps } from '../../types'; -import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { tooltips } from '../../utils/Constants'; import { useState } from 'react'; import { RiChatSettingsLine } from 'react-icons/ri'; diff --git a/frontend/src/components/ChatBot/Info/InfoModal.tsx b/frontend/src/components/ChatBot/Info/InfoModal.tsx deleted file mode 100644 index 3a6ef37bb..000000000 --- a/frontend/src/components/ChatBot/Info/InfoModal.tsx +++ /dev/null @@ -1,361 +0,0 @@ -import { Box, Typography, TextLink, Flex, Tabs, LoadingSpinner } from '@neo4j-ndl/react'; -import { DocumentTextIconOutline } from '@neo4j-ndl/react/icons'; -import '../../../styling/info.css'; -import Neo4jRetrievalLogo from '../../../assets/images/Neo4jRetrievalLogo.png'; -import wikipedialogo from '../../../assets/images/Wikipedia-logo-v2.svg'; -import youtubelogo from '../../../assets/images/youtube.png'; -import gcslogo from '../../../assets/images/gcs.webp'; -import s3logo from '../../../assets/images/s3logo.png'; - -import { - Chunk, - Entity, - ExtendedNode, - ExtendedRelationship, - GroupedEntity, - UserCredentials, - chatInfoMessage, -} from '../../../types'; -import { useEffect, useMemo, useState } from 'react'; -import HoverableLink from '../../UI/HoverableLink'; -import GraphViewButton from '../../Graph/GraphViewButton'; -import { chunkEntitiesAPI } from '../../../services/ChunkEntitiesInfo'; -import { useCredentials } from '../../../context/UserCredentials'; -import { calcWordColor } from '@neo4j-devtools/word-color'; -import ReactMarkdown from 'react-markdown'; -import { GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; -import { youtubeLinkValidation } from '../../../utils/Utils'; -const InfoModal: React.FC = ({ sources, model, total_tokens, response_time, chunk_ids }) => { - const [activeTab, setActiveTab] = useState(3); - const [infoEntities, setInfoEntities] = useState([]); - const [loading, setLoading] = useState(false); - const { userCredentials } = useCredentials(); - const [nodes, setNodes] = useState([]); - const [relationships, setRelationships] = useState([]); - const [chunks, setChunks] = useState([]); - const parseEntity = (entity: Entity) => { - const { labels, properties } = entity; - const [label] = labels; - const text = properties.id; - return { label, text }; - }; - useEffect(() => { - setLoading(true); - chunkEntitiesAPI(userCredentials as UserCredentials, chunk_ids.map((c) => c.id).join(',')) - .then((response) => { - setInfoEntities(response.data.data.nodes); - setNodes(response.data.data.nodes); - setRelationships(response.data.data.relationships); - const chunks = response.data.data.chunk_data.map((chunk: any) => { - const chunkScore = chunk_ids.find((chunkdetail) => chunkdetail.id === chunk.id); - return { - ...chunk, - score: chunkScore?.score, - }; - }); - const sortedchunks = chunks.sort((a: any, b: any) => b.score - a.score); - setChunks(sortedchunks); - setLoading(false); - }) - .catch((error) => { - console.error('Error fetching entities:', error); - setLoading(false); - }); - }, [chunk_ids]); - const groupedEntities = useMemo<{ [key: string]: GroupedEntity }>(() => { - return infoEntities.reduce((acc, entity) => { - const { label, text } = parseEntity(entity); - if (!acc[label]) { - const newColor = calcWordColor(label); - acc[label] = { texts: new Set(), color: newColor }; - } - acc[label].texts.add(text); - return acc; - }, {} as Record; color: string }>); - }, [infoEntities]); - const onChangeTabs = (tabId: number) => { - setActiveTab(tabId); - }; - const labelCounts = useMemo(() => { - const counts: { [label: string]: number } = {}; - for (let index = 0; index < infoEntities.length; index++) { - const entity = infoEntities[index]; - const { labels } = entity; - const [label] = labels; - counts[label] = counts[label] ? counts[label] + 1 : 1; - } - return counts; - }, [infoEntities]); - const sortedLabels = useMemo(() => { - return Object.keys(labelCounts).sort((a, b) => labelCounts[b] - labelCounts[a]); - }, [labelCounts]); - - const generateYouTubeLink = (url: string, startTime: string) => { - try { - const urlObj = new URL(url); - urlObj.searchParams.set('t', startTime); - return urlObj.toString(); - } catch (error) { - console.error('Invalid URL:', error); - return ''; - } - }; - return ( - - - - - Retrieval information - - To generate this response, in {response_time} seconds we used{' '} - {total_tokens} tokens with the model{' '} - {model}. - - - - - Sources used - Top Entities used - Chunks - - - - {sources.length ? ( -
      - {sources.map((link, index) => { - return ( -
    • - {link?.startsWith('http') || link?.startsWith('https') ? ( - <> - {link?.includes('wikipedia.org') && ( -
      - Wikipedia Logo - - - - {link} - - - -
      - )} - {link?.includes('storage.googleapis.com') && ( -
      - Google Cloud Storage Logo - - {decodeURIComponent(link).split('/').at(-1)?.split('?')[0] ?? 'GCS File'} - -
      - )} - {link?.startsWith('s3://') && ( -
      - S3 Logo - - {decodeURIComponent(link).split('/').at(-1) ?? 'S3 File'} - -
      - )} - {youtubeLinkValidation(link) && ( - <> -
      - - - - - {link} - - - -
      - - )} - {!link?.startsWith('s3://') && - !link?.includes('storage.googleapis.com') && - !link?.includes('wikipedia.org') && - !link?.includes('youtube.com') && ( -
      - - - {link} - -
      - )} - - ) : ( -
      - - - {link} - - {/* {chunks?.length > 0 && ( - - - Page{' '} - {chunks - .map((c) => c.page_number as number) - .sort((a, b) => a - b) - .join(', ')} - - )} */} -
      - )} -
    • - ); - })} -
    - ) : ( - No Sources Found - )} -
    - - {loading ? ( - - - - ) : Object.keys(groupedEntities).length > 0 ? ( -
      - {sortedLabels.map((label, index) => ( -
    • -
      - {label} ({labelCounts[label]}) -
      - - {Array.from(groupedEntities[label].texts).slice(0, 3).join(', ')} - -
    • - ))} -
    - ) : ( - No Entities Found - )} -
    - - {loading ? ( - - - - ) : chunks.length > 0 ? ( -
    -
      - {chunks.map((chunk) => ( -
    • - {chunk?.page_number ? ( - <> -
      - - - {/* {chunk?.fileName}, Page: {chunk?.page_number} */} - {chunk?.fileName} - -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.start_time ? ( - <> -
      - - - - {chunk?.fileName} - - -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.url.includes('wikipedia.org') ? ( - <> -
      - - {chunk?.fileName} -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.url.includes('storage.googleapis.com') ? ( - <> -
      - - {chunk?.fileName} -
      - Similarity Score: {chunk?.score} - - ) : chunk?.url && chunk?.url.startsWith('s3://') ? ( - <> -
      - - {chunk?.fileName} -
      - - ) : chunk?.url && - !chunk?.url.startsWith('s3://') && - !chunk?.url.includes('storage.googleapis.com') && - !chunk?.url.includes('wikipedia.org') && - !chunk?.url.includes('youtube.com') ? ( - <> -
      - - - {chunk?.url} - -
      - Similarity Score: {chunk?.score} - - ) : ( - <> - )} - {chunk?.text} -
    • - ))} -
    -
    - ) : ( - No Chunks Found - )} -
    -
    - {activeTab == 4 && nodes.length && relationships.length ? ( - - - - ) : ( - <> - )} -
    - ); -}; -export default InfoModal; diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx new file mode 100644 index 000000000..91fee511f --- /dev/null +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -0,0 +1,137 @@ +import { FC, useContext } from 'react'; +import { SourcesProps } from '../../types'; +import { Box, LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; +import { DocumentTextIconOutline, GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; +import { getLogo, isAllowedHost, youtubeLinkValidation } from '../../utils/Utils'; +import { ThemeWrapperContext } from '../../context/ThemeWrapper'; +import HoverableLink from '../UI/HoverableLink'; +import wikipedialogo from '../../assets/images/wikipedia.svg'; +import youtubelogo from '../../assets/images/youtube.svg'; +import gcslogo from '../../assets/images/gcs.webp'; +import s3logo from '../../assets/images/s3logo.png'; + +const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { + const themeUtils = useContext(ThemeWrapperContext); + return ( + <> + {loading ? ( + + + + ) : mode === 'entity search+vector' && chunks?.length ? ( +
      + {chunks + .map((c) => ({ fileName: c.fileName, fileSource: c.fileSource })) + .map((s, index) => { + return ( +
    • +
      + {s.fileSource === 'local file' ? ( + + ) : ( + + )} + + {s.fileName} + +
      +
    • + ); + })} +
    + ) : sources?.length ? ( +
      + {sources.map((link, index) => { + return ( +
    • + {link?.startsWith('http') || link?.startsWith('https') ? ( + <> + {isAllowedHost(link, ['wikipedia.org']) && ( +
      + Wikipedia Logo + + + + {link} + + + +
      + )} + {isAllowedHost(link, ['storage.googleapis.com']) && ( +
      + Google Cloud Storage Logo + + {decodeURIComponent(link).split('/').at(-1)?.split('?')[0] ?? 'GCS File'} + +
      + )} + {youtubeLinkValidation(link) && ( + <> +
      + + + + + {link} + + + +
      + + )} + {!link?.startsWith('s3://') && + !isAllowedHost(link, ['storage.googleapis.com', 'wikipedia.org', 'youtube.com']) && ( +
      + + + {link} + +
      + )} + + ) : link?.startsWith('s3://') ? ( +
      + S3 Logo + + {decodeURIComponent(link).split('/').at(-1) ?? 'S3 File'} + +
      + ) : ( +
      + + + {link} + +
      + )} +
    • + ); + })} +
    + ) : ( + No Sources Found + )} + + ); +}; + +export default SourcesInfo; diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 8bbe10f92..517ada241 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -28,10 +28,11 @@ import FallBackDialog from './UI/FallBackDialog'; import DeletePopUp from './Popups/DeletePopUp/DeletePopUp'; import GraphEnhancementDialog from './Popups/GraphEnhancementDialog'; import { tokens } from '@neo4j-ndl/base'; +import axios from 'axios'; +import DatabaseStatusIcon from './UI/DatabaseStatusIcon'; import RetryConfirmationDialog from './Popups/RetryConfirmation/Index'; import retry from '../services/retry'; import { showErrorToast, showNormalToast, showSuccessToast } from '../utils/toasts'; -import axios from 'axios'; const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); @@ -58,7 +59,8 @@ const Content: React.FC = ({ }); const [openGraphView, setOpenGraphView] = useState(false); const [inspectedName, setInspectedName] = useState(''); - const { setUserCredentials, userCredentials, connectionStatus, setConnectionStatus } = useCredentials(); + const { setUserCredentials, userCredentials, connectionStatus, setConnectionStatus, isGdsActive, setGdsActive } = + useCredentials(); const [showConfirmationModal, setshowConfirmationModal] = useState(false); const [extractLoading, setextractLoading] = useState(false); const [retryFile, setRetryFile] = useState(''); @@ -109,10 +111,13 @@ const Content: React.FC = ({ setUserCredentials({ uri: neo4jConnection.uri, userName: neo4jConnection.user, - password: neo4jConnection.password, + password: atob(neo4jConnection.password), database: neo4jConnection.database, port: neo4jConnection.uri.split(':')[2], }); + if (neo4jConnection.isgdsActive !== undefined) { + setGdsActive(neo4jConnection.isgdsActive); + } } else { setOpenConnection((prev) => ({ ...prev, openPopUp: true })); } @@ -140,7 +145,12 @@ const Content: React.FC = ({ if (processedCount == batchSize) { handleGenerateGraph([], true); } - }, [processedCount, userCredentials]); + if (processedCount === 1 && queue.isEmpty()) { + (async () => { + await postProcessing(userCredentials as UserCredentials, postProcessingTasks); + })(); + } + }, [processedCount, userCredentials, queue]); useEffect(() => { if (afterFirstRender) { @@ -162,13 +172,19 @@ const Content: React.FC = ({ (async () => { const parsedData = JSON.parse(connection); console.log(parsedData.uri); - const response = await connectAPI(parsedData.uri, parsedData.user, parsedData.password, parsedData.database); + const response = await connectAPI( + parsedData.uri, + parsedData.user, + atob(parsedData.password), + parsedData.database + ); if (response?.data?.status === 'Success') { localStorage.setItem( 'neo4j.connection', JSON.stringify({ ...parsedData, userDbVectorIndex: response.data.data.db_vector_dimension, + password: btoa(atob(parsedData.password)), }) ); if ( @@ -356,7 +372,10 @@ const Content: React.FC = ({ return data; }; - const addFilesToQueue = (remainingFiles: CustomFile[]) => { + const addFilesToQueue = async (remainingFiles: CustomFile[]) => { + if (!remainingFiles.length) { + await postProcessing(userCredentials as UserCredentials, postProcessingTasks); + } for (let index = 0; index < remainingFiles.length; index++) { const f = remainingFiles[index]; setFilesData((prev) => @@ -434,13 +453,11 @@ const Content: React.FC = ({ } Promise.allSettled(data).then(async (_) => { setextractLoading(false); - await postProcessing(userCredentials as UserCredentials, postProcessingTasks); }); } else if (queueFiles && !queue.isEmpty() && processingFilesCount < batchSize) { data = scheduleBatchWiseProcess(queue.items, true); Promise.allSettled(data).then(async (_) => { setextractLoading(false); - await postProcessing(userCredentials as UserCredentials, postProcessingTasks); }); } else { addFilesToQueue(filesTobeProcessed as CustomFile[]); @@ -460,7 +477,6 @@ const Content: React.FC = ({ } Promise.allSettled(data).then(async (_) => { setextractLoading(false); - await postProcessing(userCredentials as UserCredentials, postProcessingTasks); }); } else { const selectedNewFiles = childRef.current @@ -739,16 +755,14 @@ const Content: React.FC = ({ chunksExistsWithDifferentEmbedding={openConnection.chunksExistsWithDifferentDimension} /> -
    Neo4j connection - {!connectionStatus ? : } - {connectionStatus ? ( - {userCredentials?.uri} - ) : ( - Not Connected - )} +
    {!isSchema ? ( diff --git a/frontend/src/components/DataSources/Local/DropZone.tsx b/frontend/src/components/DataSources/Local/DropZone.tsx index 7ff5d90d2..0278e679e 100644 --- a/frontend/src/components/DataSources/Local/DropZone.tsx +++ b/frontend/src/components/DataSources/Local/DropZone.tsx @@ -7,7 +7,7 @@ import { useFileContext } from '../../../context/UsersFiles'; import { CustomFile, CustomFileBase, UserCredentials } from '../../../types'; import { buttonCaptions, chunkSize } from '../../../utils/Constants'; import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; -import IconButtonWithToolTip from '../../UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from '../../UI/IconButtonToolTip'; import { uploadAPI } from '../../../utils/FileAPI'; import { showErrorToast, showSuccessToast } from '../../../utils/toasts'; diff --git a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx index 21bd47240..17c97d0bc 100644 --- a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx +++ b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx @@ -142,7 +142,7 @@ export default function DropZoneForSmallLayouts() { uploadNextChunk(); }; - const { acceptedFiles, getRootProps, getInputProps } = useDropzone({ + const { getRootProps, getInputProps } = useDropzone({ accept: { 'application/pdf': ['.pdf'], 'image/*': ['.jpeg', '.jpg', '.png', '.svg'], @@ -214,7 +214,6 @@ export default function DropZoneForSmallLayouts() { setFilesData(copiedFilesData); } }; - console.log(acceptedFiles); return ( <>
    diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index aa4d9bb9a..2d0d690aa 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -43,7 +43,7 @@ import useServerSideEvent from '../hooks/useSse'; import { AxiosError } from 'axios'; import { XMarkIconOutline } from '@neo4j-ndl/react/icons'; import cancelAPI from '../services/CancelAPI'; -import IconButtonWithToolTip from './UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from './UI/IconButtonToolTip'; import { batchSize, largeFileSize, llms } from '../utils/Constants'; import IndeterminateCheckbox from './UI/CustomCheckBox'; import { showErrorToast, showNormalToast } from '../utils/toasts'; diff --git a/frontend/src/components/Graph/CheckboxSelection.tsx b/frontend/src/components/Graph/CheckboxSelection.tsx index 7e3096daa..b335a4324 100644 --- a/frontend/src/components/Graph/CheckboxSelection.tsx +++ b/frontend/src/components/Graph/CheckboxSelection.tsx @@ -1,21 +1,31 @@ import { Checkbox } from '@neo4j-ndl/react'; import React from 'react'; import { CheckboxSectionProps } from '../../types'; -const CheckboxSelection: React.FC = ({ graphType, loading, handleChange }) => ( +import { graphLabels } from '../../utils/Constants'; + +const CheckboxSelection: React.FC = ({ graphType, loading, handleChange, isgds }) => (
    handleChange('DocumentChunk')} /> handleChange('Entities')} /> + {isgds && ( + handleChange('Communities')} + /> + )}
    ); diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 4125c0d12..50c381685 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -30,7 +30,7 @@ import { MagnifyingGlassMinusIconOutline, MagnifyingGlassPlusIconOutline, } from '@neo4j-ndl/react/icons'; -import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { filterData, processGraphData, sortAlphabetically } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; import { LegendsChip } from './LegendsChip'; @@ -58,17 +58,17 @@ const GraphViewModal: React.FunctionComponent = ({ const nvlRef = useRef(null); const [nodes, setNodes] = useState([]); const [relationships, setRelationships] = useState([]); - const [graphType, setGraphType] = useState(intitalGraphType); const [allNodes, setAllNodes] = useState([]); const [allRelationships, setAllRelationships] = useState([]); const [loading, setLoading] = useState(false); const [status, setStatus] = useState<'unknown' | 'success' | 'danger'>('unknown'); const [statusMessage, setStatusMessage] = useState(''); - const { userCredentials } = useCredentials(); + const { userCredentials, isGdsActive } = useCredentials(); const [scheme, setScheme] = useState({}); const [newScheme, setNewScheme] = useState({}); const [searchQuery, setSearchQuery] = useState(''); const debouncedQuery = useDebounce(searchQuery, 300); + const [graphType, setGraphType] = useState(intitalGraphType(isGdsActive)); const [disableRefresh, setDisableRefresh] = useState(false); // the checkbox selection @@ -120,7 +120,7 @@ const GraphViewModal: React.FunctionComponent = ({ if (nvlRef.current) { nvlRef.current?.destroy(); } - setGraphType(intitalGraphType); + setGraphType(intitalGraphType(isGdsActive)); clearTimeout(timeoutId); setScheme({}); setNodes([]); @@ -182,6 +182,7 @@ const GraphViewModal: React.FunctionComponent = ({ useEffect(() => { if (open) { setLoading(true); + setGraphType(intitalGraphType(isGdsActive)); if (viewPoint !== 'chatInfoView') { graphApi(); } else { @@ -195,7 +196,31 @@ const GraphViewModal: React.FunctionComponent = ({ setLoading(false); } } - }, [open]); + }, [open, isGdsActive]); + + useEffect(() => { + handleSearch(debouncedQuery); + }, [debouncedQuery]); + + const initGraph = ( + graphType: GraphType[], + finalNodes: ExtendedNode[], + finalRels: Relationship[], + schemeVal: Scheme + ) => { + if (allNodes.length > 0 && allRelationships.length > 0) { + const { filteredNodes, filteredRelations, filteredScheme } = filterData( + graphType, + finalNodes ?? [], + finalRels ?? [], + schemeVal, + isGdsActive + ); + setNodes(filteredNodes); + setRelationships(filteredRelations); + setNewScheme(filteredScheme); + } + }; // The search and update nodes const handleSearch = useCallback( @@ -239,29 +264,6 @@ const GraphViewModal: React.FunctionComponent = ({ [nodes] ); - useEffect(() => { - handleSearch(debouncedQuery); - }, [debouncedQuery]); - - const initGraph = ( - graphType: GraphType[], - finalNodes: ExtendedNode[], - finalRels: Relationship[], - schemeVal: Scheme - ) => { - if (allNodes.length > 0 && allRelationships.length > 0) { - const { filteredNodes, filteredRelations, filteredScheme } = filterData( - graphType, - finalNodes ?? [], - finalRels ?? [], - schemeVal - ); - setNodes(filteredNodes); - setRelationships(filteredRelations); - setNewScheme(filteredScheme); - } - }; - // Unmounting the component if (!open) { return <>; @@ -308,7 +310,7 @@ const GraphViewModal: React.FunctionComponent = ({ setStatusMessage(''); setGraphViewOpen(false); setScheme({}); - setGraphType(intitalGraphType); + setGraphType(intitalGraphType(isGdsActive)); setNodes([]); setRelationships([]); setAllNodes([]); @@ -397,6 +399,7 @@ const GraphViewModal: React.FunctionComponent = ({ setNodes(updatedNodes); }; + // const isCommunity = allNodes.some(n=>n.labels.includes('__Community__')); return ( <> = ({ {headerTitle} {checkBoxView && ( - + n.labels.includes('__Community__'))} + /> )} @@ -430,7 +438,7 @@ const GraphViewModal: React.FunctionComponent = ({
    ) : nodes.length === 0 && relationships.length === 0 && graphType.length !== 0 ? (
    - +
    ) : graphType.length === 0 ? (
    diff --git a/frontend/src/components/Graph/LegendsChip.tsx b/frontend/src/components/Graph/LegendsChip.tsx index 2ab9c7b40..832150fad 100644 --- a/frontend/src/components/Graph/LegendsChip.tsx +++ b/frontend/src/components/Graph/LegendsChip.tsx @@ -1,6 +1,15 @@ import { LegendChipProps } from '../../types'; +import { graphLabels } from '../../utils/Constants'; import Legend from '../UI/Legend'; export const LegendsChip: React.FunctionComponent = ({ scheme, label, type, count, onClick }) => { - return ; + return ( + + ); }; diff --git a/frontend/src/components/Layout/Header.tsx b/frontend/src/components/Layout/Header.tsx index 345a004fd..acf62eefa 100644 --- a/frontend/src/components/Layout/Header.tsx +++ b/frontend/src/components/Layout/Header.tsx @@ -8,7 +8,7 @@ import { } from '@neo4j-ndl/react/icons'; import { Typography } from '@neo4j-ndl/react'; import { useCallback, useEffect } from 'react'; -import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { tooltips } from '../../utils/Constants'; import { useFileContext } from '../../context/UsersFiles'; diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index 89763737d..bdc43c8be 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -17,7 +17,7 @@ import ExpandedChatButtonContainer from '../ChatBot/ExpandedChatButtonContainer' import { APP_SOURCES, tooltips } from '../../utils/Constants'; import ChatModeToggle from '../ChatBot/ChatModeToggle'; import { RiChatSettingsLine } from 'react-icons/ri'; -import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import GCSButton from '../DataSources/GCS/GCSButton'; import S3Component from '../DataSources/AWS/S3Bucket'; import WebButton from '../DataSources/Web/WebButton'; @@ -203,6 +203,7 @@ const SideNav: React.FC = ({ {!isChatModalOpen && ( { setchatModeAnchor(e.currentTarget); setshowChatMode(true); @@ -217,6 +218,7 @@ const SideNav: React.FC = ({ closeHandler={() => setshowChatMode(false)} menuAnchor={chatModeAnchor} disableBackdrop={true} + anchorPortal={true} > } diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 8274e626a..0213e9502 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -41,7 +41,7 @@ export default function ConnectionModal({ const [username, setUsername] = useState(initialusername ?? 'neo4j'); const [password, setPassword] = useState(''); const [connectionMessage, setMessage] = useState({ type: 'unknown', content: '' }); - const { setUserCredentials, userCredentials } = useCredentials(); + const { setUserCredentials, userCredentials, setGdsActive } = useCredentials(); const [isLoading, setIsLoading] = useState(false); const [searchParams, setSearchParams] = useSearchParams(); const [userDbVectorIndex, setUserDbVectorIndex] = useState(initialuserdbvectorindex ?? undefined); @@ -84,7 +84,7 @@ export default function ConnectionModal({ JSON.stringify({ uri: usercredential?.uri, user: usercredential?.userName, - password: usercredential?.password, + password: btoa(usercredential?.password), database: usercredential?.database, userDbVectorIndex: 384, }) @@ -189,6 +189,7 @@ export default function ConnectionModal({ setMessage({ type: 'danger', content: 'Please drop a valid file' }); } } catch (err: any) { + console.log({ err }); setMessage({ type: 'danger', content: err.message }); } } @@ -206,14 +207,17 @@ export default function ConnectionModal({ if (response?.data?.status !== 'Success') { throw new Error(response.data.error); } else { + const isgdsActive = response.data.data.gds_status; + setGdsActive(isgdsActive); localStorage.setItem( 'neo4j.connection', JSON.stringify({ uri: connectionURI, user: username, - password: password, + password: btoa(password), database: database, userDbVectorIndex, + isgdsActive, }) ); setUserDbVectorIndex(response.data.data.db_vector_dimension); @@ -259,6 +263,7 @@ export default function ConnectionModal({ } } } catch (error) { + console.log({ error }); setIsLoading(false); if (error instanceof Error) { setMessage({ type: 'danger', content: error.message }); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx index 06e76ff8e..28114ced7 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx @@ -3,11 +3,12 @@ import { POST_PROCESSING_JOBS } from '../../../../utils/Constants'; import { capitalize } from '../../../../utils/Utils'; import { useFileContext } from '../../../../context/UsersFiles'; import { tokens } from '@neo4j-ndl/base'; - +import { useCredentials } from '../../../../context/UserCredentials'; export default function PostProcessingCheckList() { const { breakpoints } = tokens; const tablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); const { postProcessingTasks, setPostProcessingTasks } = useFileContext(); + const { isGdsActive } = useCredentials(); return (
    @@ -18,29 +19,38 @@ export default function PostProcessingCheckList() { - {POST_PROCESSING_JOBS.map((job, idx) => ( - - - {job.title - .split('_') - .map((s) => capitalize(s)) - .join(' ')} - - } - checked={postProcessingTasks.includes(job.title)} - onChange={(e) => { - if (e.target.checked) { - setPostProcessingTasks((prev) => [...prev, job.title]); - } else { - setPostProcessingTasks((prev) => prev.filter((s) => s !== job.title)); + {POST_PROCESSING_JOBS.map((job, idx) => { + const isCreateCommunities = job.title === 'create_communities'; + return ( + + + {job.title + .split('_') + .map((s) => capitalize(s)) + .join(' ')} + } - }} - > - {job.description} - - ))} + checked={ + isCreateCommunities + ? isGdsActive && postProcessingTasks.includes(job.title) + : postProcessingTasks.includes(job.title) + } + onChange={(e) => { + if (e.target.checked) { + setPostProcessingTasks((prev) => [...prev, job.title]); + } else { + setPostProcessingTasks((prev) => prev.filter((s) => s !== job.title)); + } + }} + disabled={isCreateCommunities && !isGdsActive} + aria-label='checkbox-postProcessing' + /> + {job.description} + + ); + })}
    diff --git a/frontend/src/components/Popups/Settings/SchemaFromText.tsx b/frontend/src/components/Popups/Settings/SchemaFromText.tsx index 8ebc15020..914d9580c 100644 --- a/frontend/src/components/Popups/Settings/SchemaFromText.tsx +++ b/frontend/src/components/Popups/Settings/SchemaFromText.tsx @@ -27,7 +27,6 @@ const SchemaFromTextDialog = ({ try { setloading(true); const response = await getNodeLabelsAndRelTypesFromText(model, userText, isSchema); - console.log({ response }); setloading(false); if (response.data.status === 'Success') { if (response.data?.data?.labels.length) { diff --git a/frontend/src/components/UI/DatabaseIcon.tsx b/frontend/src/components/UI/DatabaseIcon.tsx new file mode 100644 index 000000000..ae291e401 --- /dev/null +++ b/frontend/src/components/UI/DatabaseIcon.tsx @@ -0,0 +1,25 @@ +import { CircleStackIconOutline, ScienceMoleculeIcon } from '@neo4j-ndl/react/icons'; +import { IconWithToolTip } from './IconButtonToolTip'; +import { DatabaseStatusProps } from '../../types'; +import { connectionLabels } from '../../utils/Constants'; + +const DatabaseStatusIcon: React.FC = ({ isConnected, isGdsActive, uri }) => { + const iconStyle = { fill: 'none', stroke: isConnected ? connectionLabels.greenStroke : connectionLabels.redStroke }; + const text = isGdsActive ? connectionLabels.graphDataScience : connectionLabels.graphDatabase; + return ( +
    + + + {isGdsActive ? ( + + ) : ( + + )} + + + {isConnected ? uri : connectionLabels.notConnected} +
    + ); +}; + +export default DatabaseStatusIcon; diff --git a/frontend/src/components/UI/DatabaseStatusIcon.tsx b/frontend/src/components/UI/DatabaseStatusIcon.tsx new file mode 100644 index 000000000..7b159607e --- /dev/null +++ b/frontend/src/components/UI/DatabaseStatusIcon.tsx @@ -0,0 +1,26 @@ +import { CircleStackIconOutline } from '@neo4j-ndl/react/icons'; +import { IconWithToolTip } from './IconButtonToolTip'; +import { DatabaseStatusProps } from '../../types'; +import { connectionLabels } from '../../utils/Constants'; +import ScienceMoleculeIcon from '../UI/ScienceMolecule'; + +const DatabaseStatusIcon: React.FC = ({ isConnected, isGdsActive, uri }) => { + const strokeColour = isConnected ? connectionLabels.greenStroke : connectionLabels.redStroke; + const text = isGdsActive ? connectionLabels.graphDataScience : connectionLabels.graphDatabase; + return ( +
    + + + {isGdsActive ? ( + + ) : ( + + )} + + + {isConnected ? uri : connectionLabels.notConnected} +
    + ); +}; + +export default DatabaseStatusIcon; diff --git a/frontend/src/components/UI/IconButtonToolTip.tsx b/frontend/src/components/UI/IconButtonToolTip.tsx index 9622fd8e2..5e7ecb47b 100644 --- a/frontend/src/components/UI/IconButtonToolTip.tsx +++ b/frontend/src/components/UI/IconButtonToolTip.tsx @@ -1,7 +1,7 @@ import { IconButton, Tip } from '@neo4j-ndl/react'; import { useState } from 'react'; -const IconButtonWithToolTip = ({ +export const IconButtonWithToolTip = ({ text, children, onClick, @@ -48,4 +48,27 @@ const IconButtonWithToolTip = ({ ); }; -export default IconButtonWithToolTip; +export const IconWithToolTip = ({ + text, + children, + placement = 'bottom', +}: { + label: string; + text: string | React.ReactNode; + children: React.ReactNode; + onClick?: React.MouseEventHandler | undefined; + size?: 'small' | 'medium' | 'large'; + clean?: boolean; + grouped?: boolean; + placement?: 'bottom' | 'top' | 'right' | 'left'; + disabled?: boolean; +}) => { + return ( + + {children} + + {text} + + + ); +}; diff --git a/frontend/src/components/UI/Menu.tsx b/frontend/src/components/UI/Menu.tsx index 4cfd1e238..62cc25095 100644 --- a/frontend/src/components/UI/Menu.tsx +++ b/frontend/src/components/UI/Menu.tsx @@ -1,6 +1,5 @@ import { Menu } from '@neo4j-ndl/react'; import { Menuitems, Origin } from '../../types'; - export default function CustomMenu({ open, closeHandler, @@ -23,25 +22,29 @@ export default function CustomMenu({ return ( { + closeHandler(); + }} anchorOrigin={anchorOrigin} transformOrigin={transformOrigin} anchorPortal={anchorPortal} anchorEl={MenuAnchor} disableBackdrop={disableBackdrop} + className='custom-menu' > - {items?.map((i, idx) => { - return ( - - ); - })} + {items?.map((i, idx) => ( + { + i.onClick(); + closeHandler(); + }} + disabled={i.disabledCondition} + className={i.isSelected ? i.selectedClassName : ''} + description={i.description} + /> + ))} ); } diff --git a/frontend/src/components/UI/ScienceMolecule.tsx b/frontend/src/components/UI/ScienceMolecule.tsx new file mode 100644 index 000000000..6cd046f14 --- /dev/null +++ b/frontend/src/components/UI/ScienceMolecule.tsx @@ -0,0 +1,40 @@ +import React from 'react'; + +interface ScienceMoleculeIconProps { + currentColour: string; +} + +const ScienceMoleculeIcon: React.FC = ({ currentColour }) => ( + + + + + +); + +export default ScienceMoleculeIcon; diff --git a/frontend/src/components/WebSources/GenericSourceButton.tsx b/frontend/src/components/WebSources/GenericSourceButton.tsx index 65c8b0046..96aab02ff 100644 --- a/frontend/src/components/WebSources/GenericSourceButton.tsx +++ b/frontend/src/components/WebSources/GenericSourceButton.tsx @@ -1,6 +1,6 @@ import { DataComponentProps } from '../../types'; import { Flex, Typography } from '@neo4j-ndl/react'; -import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; import { APP_SOURCES } from '../../utils/Constants'; import WebButton from '../DataSources/Web/WebButton'; diff --git a/frontend/src/context/UserCredentials.tsx b/frontend/src/context/UserCredentials.tsx index af13ea371..1b5eda2da 100644 --- a/frontend/src/context/UserCredentials.tsx +++ b/frontend/src/context/UserCredentials.tsx @@ -8,6 +8,8 @@ type Props = { export const UserConnection = createContext({ userCredentials: null, setUserCredentials: () => null, + isGdsActive: false, + setGdsActive: () => false, connectionStatus: false, setConnectionStatus: () => null, }); @@ -17,13 +19,17 @@ export const useCredentials = () => { }; const UserCredentialsWrapper: FunctionComponent = (props) => { const [userCredentials, setUserCredentials] = useState(null); + const [isGdsActive, setGdsActive] = useState(false); const [connectionStatus, setConnectionStatus] = useReducer((state) => !state, false); const value = { userCredentials, setUserCredentials, + isGdsActive, + setGdsActive, connectionStatus, setConnectionStatus, }; + return {props.children}; }; export default UserCredentialsWrapper; diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index fd3531403..fbab1b719 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -1,6 +1,6 @@ import { createContext, useContext, useState, Dispatch, SetStateAction, FC, useEffect } from 'react'; import { CustomFile, FileContextProviderProps, OptionType } from '../types'; -import { defaultLLM } from '../utils/Constants'; +import { chatModeLables, defaultLLM } from '../utils/Constants'; import { useCredentials } from './UserCredentials'; import Queue from '../utils/Queue'; interface showTextFromSchemaDialogType { @@ -58,7 +58,7 @@ const FileContextProvider: FC = ({ children }) => { const [selectedSchemas, setSelectedSchemas] = useState([]); const [rowSelection, setRowSelection] = useState>({}); const [selectedRows, setSelectedRows] = useState([]); - const [chatMode, setchatMode] = useState('graph+vector+fulltext'); + const [chatMode, setchatMode] = useState(chatModeLables.graph_vector_fulltext); const [isSchema, setIsSchema] = useState(false); const [showTextFromSchemaDialog, setShowTextFromSchemaDialog] = useState({ triggeredFrom: '', @@ -68,6 +68,7 @@ const FileContextProvider: FC = ({ children }) => { 'materialize_text_chunk_similarities', 'enable_hybrid_search_and_fulltext_search_in_bloom', 'materialize_entity_similarities', + 'create_communities', ]); const [processedCount, setProcessedCount] = useState(0); useEffect(() => { diff --git a/frontend/src/services/CancelAPI.ts b/frontend/src/services/CancelAPI.ts index de3ea9ba0..f491f06e0 100644 --- a/frontend/src/services/CancelAPI.ts +++ b/frontend/src/services/CancelAPI.ts @@ -9,7 +9,7 @@ const cancelAPI = async (filenames: string[], source_types: string[]) => { formData.append('uri', credentials?.uri ?? ''); formData.append('database', credentials?.database ?? ''); formData.append('userName', credentials?.user ?? ''); - formData.append('password', credentials?.password ?? ''); + formData.append('password', atob(credentials?.password) ?? ''); } formData.append('filenames', JSON.stringify(filenames)); formData.append('source_types', JSON.stringify(source_types)); diff --git a/frontend/src/services/ChunkEntitiesInfo.ts b/frontend/src/services/ChunkEntitiesInfo.ts index aa133c815..f69ddd3aa 100644 --- a/frontend/src/services/ChunkEntitiesInfo.ts +++ b/frontend/src/services/ChunkEntitiesInfo.ts @@ -1,13 +1,20 @@ import { ChatInfo_APIResponse, UserCredentials } from '../types'; import api from '../API/Index'; -const chunkEntitiesAPI = async (userCredentials: UserCredentials, chunk_ids: string) => { +const chunkEntitiesAPI = async ( + userCredentials: UserCredentials, + chunk_ids: string, + database: string = 'neo4j', + is_entity: boolean = false +) => { try { const formData = new FormData(); formData.append('uri', userCredentials?.uri ?? ''); formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); formData.append('chunk_ids', chunk_ids); + formData.append('database', database); + formData.append('is_entity', String(is_entity)); const response: ChatInfo_APIResponse = await api.post(`/chunk_entities`, formData, { headers: { diff --git a/frontend/src/types.ts b/frontend/src/types.ts index d16f56fa1..9631335a9 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -223,6 +223,7 @@ export interface Messages { cypher_query?: string; graphonly_entities?: []; error?: string; + entities?: chunk[]; } export type ChatbotProps = { @@ -245,7 +246,7 @@ export interface GraphViewModalProps { selectedRows?: CustomFile[] | undefined; } -export type GraphType = 'Entities' | 'DocumentChunk'; +export type GraphType = 'Entities' | 'DocumentChunk' | 'Communities'; export type PartialLabelNode = Partial & { labels: string; @@ -255,6 +256,7 @@ export interface CheckboxSectionProps { graphType: GraphType[]; loading: boolean; handleChange: (graph: GraphType) => void; + isgds: boolean; } export interface fileName { @@ -463,7 +465,13 @@ export type Entity = { id: string; }; }; - +export type Community = { + id: string; + summary: string; + weight: number; + level: number; + community_rank: number; +}; export type GroupedEntity = { texts: Set; color: string; @@ -501,6 +509,7 @@ export interface Chunk { url?: string; fileSource: string; score?: string; + fileType: string; } export interface SpeechSynthesisProps { @@ -525,7 +534,7 @@ export interface SettingsModalProps { onClear?: () => void; } export interface Menuitems { - title: string; + title: string | JSX.Element; onClick: () => void; disabledCondition: boolean; description?: string | React.ReactNode; @@ -620,6 +629,8 @@ export interface DrawerChatbotProps { export interface ContextProps { userCredentials: UserCredentials | null; setUserCredentials: (UserCredentials: UserCredentials) => void; + isGdsActive: boolean; + setGdsActive: Dispatch>; connectionStatus: boolean; setConnectionStatus: Dispatch>; } @@ -627,3 +638,33 @@ export interface MessageContextType { messages: Messages[] | []; setMessages: Dispatch>; } + +export interface DatabaseStatusProps { + isConnected: boolean; + isGdsActive: boolean; + uri: string | null; +} + +export type SourcesProps = { + loading: boolean; + mode: string; + sources: string[]; + chunks: Chunk[]; +}; + +export type ChunkProps = { + loading: boolean; + chunks: Chunk[]; +}; + +export type EntitiesProps = { + loading: boolean; + mode: string; + graphonly_entities: []; + infoEntities: Entity[]; +}; + +export type CommunitiesProps = { + loading: boolean; + communities: Community[]; +}; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 114b20fa0..0b2f60cba 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -1,5 +1,6 @@ import { NvlOptions } from '@neo4j-nvl/base'; import { GraphType, OptionType } from '../types'; +import { getDescriptionForChatMode } from './Utils'; export const document = `+ [docs]`; @@ -28,10 +29,12 @@ export const docChunkEntities = `+[chunks] + collect { MATCH p=(c)-[:SIMILAR]-() RETURN p } // similar-chunks //chunks with entities + collect { OPTIONAL MATCH p=(c:Chunk)-[:HAS_ENTITY]->(e)-[*0..1]-(:!Chunk) RETURN p }`; + export const APP_SOURCES = process.env.VITE_REACT_APP_SOURCES !== '' ? (process.env.VITE_REACT_APP_SOURCES?.split(',') as string[]) : ['gcs', 's3', 'local', 'wiki', 'youtube', 'web']; + export const llms = process.env?.VITE_LLM_MODELS?.trim() != '' ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) @@ -56,10 +59,60 @@ export const defaultLLM = llms?.includes('openai-gpt-4o') : llms?.includes('gemini-1.0-pro') ? 'gemini-1.0-pro' : 'diffbot'; + +// export const chatModes = +// process.env?.VITE_CHAT_MODES?.trim() != '' +// ? process.env.VITE_CHAT_MODES?.split(',') +// : ['vector', 'graph', 'graph+vector', 'fulltext', 'graph+vector+fulltext', 'local community', 'global community']; + +export const chatModeLables = { + vector: 'vector', + graph : 'graph', + graph_vector: 'graph+vector', + fulltext: 'fulltext', + graph_vector_fulltext: 'graph+vector+fulltext', + entity_vector:'entity search+vector', + unavailableChatMode: 'Chat mode is unavailable when rows are selected', + selected:'Selected' +} export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' - ? process.env.VITE_CHAT_MODES?.split(',') - : ['vector', 'graph', 'graph+vector', 'fulltext', 'graph+vector+fulltext']; + ? process.env.VITE_CHAT_MODES?.split(',').map((mode) => ({ + mode: mode.trim(), + description: getDescriptionForChatMode(mode.trim()), + })) + : [ + { + mode: chatModeLables.vector, + description: 'Performs semantic similarity search on text chunks using vector indexing.', + }, + { + mode: chatModeLables.graph, + description: + 'Translates text to Cypher queries for precise data retrieval from a graph database.' + }, + { + mode: chatModeLables.graph_vector, + description: + 'Combines vector indexing and graph connections for contextually enhanced semantic search.', + }, + { + mode: chatModeLables.fulltext, + description: + 'Conducts fast, keyword-based search using full-text indexing on text chunks.', + }, + { + mode: chatModeLables.graph_vector_fulltext, + description: + 'Integrates vector, graph, and full-text indexing for comprehensive search results.', + }, + { + mode: chatModeLables.entity_vector, + description: + 'Uses vector indexing on entity nodes for highly relevant entity-based search.', + }, + ]; + export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; export const timePerByte = 0.2; @@ -184,6 +237,10 @@ export const POST_PROCESSING_JOBS: { title: string; description: string }[] = [ semantic meaning. This facilitates tasks like clustering similar entities, identifying duplicates, and performing similarity-based searches.`, }, + { + title: 'create_communities', + description: 'Create Communities identifies and groups similar entities, improving search accuracy and analysis.', + }, ]; export const RETRY_OPIONS = [ 'start_from_beginning', @@ -215,7 +272,12 @@ export const graphView: OptionType[] = [ { label: 'Entity Graph', value: queryMap.Entities }, { label: 'Knowledge Graph', value: queryMap.DocChunkEntities }, ]; -export const intitalGraphType: GraphType[] = ['DocumentChunk', 'Entities']; + +export const intitalGraphType = (isGDSActive: boolean): GraphType[] => { + return isGDSActive + ? ['DocumentChunk', 'Entities', 'Communities'] // GDS is active, include communities + : ['DocumentChunk', 'Entities']; // GDS is inactive, exclude communities +}; export const appLabels = { ownSchema: 'Or Define your own Schema', @@ -237,6 +299,17 @@ export const graphLabels = { selectCheckbox: 'Select atleast one checkbox for graph view', totalRelationships: 'Total Relationships', nodeSize: 30, + docChunk: 'Document & Chunk', + community: 'Communities', + noNodesRels: 'No Nodes and No relationships', }; export const RESULT_STEP_SIZE = 25; + +export const connectionLabels = { + notConnected: 'Not Connected', + graphDataScience: 'Graph Data Science', + graphDatabase: 'Graph Database', + greenStroke: 'green', + redStroke: 'red', +}; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 8b49c7e7d..97e7bda07 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -11,6 +11,15 @@ import { SourceNode, UserCredentials, } from '../types'; +import Wikipediadarkmode from '../assets/images/wikipedia-darkmode.svg'; +import Wikipediadlogo from '../assets/images/wikipedia.svg'; +import webdarklogo from '../assets/images/web-darkmode.svg'; +import weblogo from '../assets/images/web.svg'; +import youtubedarklogo from '../assets/images/youtube-darkmode.svg'; +import youtubelightlogo from '../assets/images/youtube-lightmode.svg'; +import s3logo from '../assets/images/s3logo.png'; +import gcslogo from '../assets/images/gcs.webp'; +import { chatModeLables } from './Constants'; // Get the Url export const url = () => { @@ -181,18 +190,34 @@ export const processGraphData = (neoNodes: ExtendedNode[], neoRels: ExtendedRela return { finalNodes, finalRels, schemeVal }; }; +/** + * Filters nodes, relationships, and scheme based on the selected graph types. + * + * @param graphType - An array of graph types to filter by (e.g., 'DocumentChunk', 'Entities', 'Communities'). + * @param allNodes - An array of all nodes present in the graph. + * @param allRelationships - An array of all relationships in the graph. + * @param scheme - The scheme object containing node and relationship information. + * @returns An object containing filtered nodes, relationships, and scheme based on the selected graph types. + */ export const filterData = ( graphType: GraphType[], allNodes: ExtendedNode[], allRelationships: Relationship[], - scheme: Scheme + scheme: Scheme, + isGdsActive: boolean ) => { let filteredNodes: ExtendedNode[] = []; let filteredRelations: Relationship[] = []; let filteredScheme: Scheme = {}; - const entityTypes = Object.keys(scheme).filter((type) => type !== 'Document' && type !== 'Chunk'); - if (graphType.includes('DocumentChunk') && !graphType.includes('Entities')) { - // Document + Chunk + const entityTypes = Object.keys(scheme).filter( + (type) => type !== 'Document' && type !== 'Chunk' && type !== '__Community__' + ); + // Only Document + Chunk + if ( + graphType.includes('DocumentChunk') && + !graphType.includes('Entities') && + (!graphType.includes('Communities') || !isGdsActive) + ) { filteredNodes = allNodes.filter( (node) => (node.labels.includes('Document') && node.properties.fileName) || node.labels.includes('Chunk') ); @@ -204,8 +229,12 @@ export const filterData = ( nodeIds.has(rel.to) ); filteredScheme = { Document: scheme.Document, Chunk: scheme.Chunk }; - } else if (graphType.includes('Entities') && !graphType.includes('DocumentChunk')) { // Only Entity + } else if ( + graphType.includes('Entities') && + !graphType.includes('DocumentChunk') && + (!graphType.includes('Communities') || !isGdsActive) + ) { const entityNodes = allNodes.filter((node) => !node.labels.includes('Document') && !node.labels.includes('Chunk')); filteredNodes = entityNodes ? entityNodes : []; const nodeIds = new Set(filteredNodes.map((node) => node.id)); @@ -216,15 +245,100 @@ export const filterData = ( nodeIds.has(rel.to) ); filteredScheme = Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])) as Scheme; - } else if (graphType.includes('DocumentChunk') && graphType.includes('Entities')) { + // Only Communities + } else if ( + graphType.includes('Communities') && + !graphType.includes('DocumentChunk') && + !graphType.includes('Entities') && + isGdsActive + ) { + filteredNodes = allNodes.filter((node) => node.labels.includes('__Community__')); + const nodeIds = new Set(filteredNodes.map((node) => node.id)); + filteredRelations = allRelationships.filter( + (rel) => + ['IN_COMMUNITY', 'PARENT_COMMUNITY'].includes(rel.caption ?? '') && nodeIds.has(rel.from) && nodeIds.has(rel.to) + ); + filteredScheme = { __Community__: scheme.__Community__ }; // Document + Chunk + Entity + } else if ( + graphType.includes('DocumentChunk') && + graphType.includes('Entities') && + (!graphType.includes('Communities') || !isGdsActive) + ) { + filteredNodes = allNodes.filter( + (node) => + (node.labels.includes('Document') && node.properties.fileName) || + node.labels.includes('Chunk') || + (!node.labels.includes('Document') && !node.labels.includes('Chunk') && !node.labels.includes('__Community__')) + ); + const nodeIds = new Set(filteredNodes.map((node) => node.id)); + filteredRelations = allRelationships.filter( + (rel) => + !['IN_COMMUNITY', 'PARENT_COMMUNITY'].includes(rel.caption ?? '') && + nodeIds.has(rel.from) && + nodeIds.has(rel.to) + ); + filteredScheme = { + Document: scheme.Document, + Chunk: scheme.Chunk, + ...Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])), + }; + // Entities + Communities + } else if ( + graphType.includes('Entities') && + graphType.includes('Communities') && + !graphType.includes('DocumentChunk') && + isGdsActive + ) { + const entityNodes = allNodes.filter((node) => !node.labels.includes('Document') && !node.labels.includes('Chunk')); + const communityNodes = allNodes.filter((node) => node.labels.includes('__Community__')); + filteredNodes = [...entityNodes, ...communityNodes]; + const nodeIds = new Set(filteredNodes.map((node) => node.id)); + filteredRelations = allRelationships.filter( + (rel) => + !['PART_OF', 'FIRST_CHUNK', 'SIMILAR', 'NEXT_CHUNK'].includes(rel.caption ?? '') && + nodeIds.has(rel.from) && + nodeIds.has(rel.to) + ); + filteredScheme = { + ...Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])), + __Community__: scheme.__Community__, + }; + // Document + Chunk + Communities + } else if ( + graphType.includes('DocumentChunk') && + graphType.includes('Communities') && + !graphType.includes('Entities') && + isGdsActive + ) { + const documentChunkNodes = allNodes.filter( + (node) => (node.labels.includes('Document') && node.properties.fileName) || node.labels.includes('Chunk') + ); + const communityNodes = allNodes.filter((node) => node.labels.includes('__Community__')); + filteredNodes = [...documentChunkNodes, ...communityNodes]; + const nodeIds = new Set(filteredNodes.map((node) => node.id)); + filteredRelations = allRelationships.filter( + (rel) => + ['PART_OF', 'FIRST_CHUNK', 'SIMILAR', 'NEXT_CHUNK', 'IN_COMMUNITY', 'PARENT_COMMUNITY'].includes( + rel.caption ?? '' + ) && + nodeIds.has(rel.from) && + nodeIds.has(rel.to) + ); + filteredScheme = { Document: scheme.Document, Chunk: scheme.Chunk, __Community__: scheme.__Community__ }; + // Document + Chunk + Entity + Communities (All types) + } else if ( + graphType.includes('DocumentChunk') && + graphType.includes('Entities') && + graphType.includes('Communities') && + isGdsActive + ) { filteredNodes = allNodes; filteredRelations = allRelationships; filteredScheme = scheme; } return { filteredNodes, filteredRelations, filteredScheme }; }; - export const getDateTime = () => { const date = new Date(); const formattedDateTime = `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`; @@ -246,8 +360,11 @@ export const capitalize = (word: string): string => { }; export const parseEntity = (entity: Entity) => { const { labels, properties } = entity; - const [label] = labels; + let [label] = labels; const text = properties.id; + if (!label) { + label = 'Entity'; + } return { label, text }; }; @@ -289,3 +406,66 @@ export const sortAlphabetically = (a: Relationship, b: Relationship) => { const captionTwo = b.caption?.toLowerCase() || ''; return captionOne.localeCompare(captionTwo); }; + +export const capitalizeWithPlus = (s: string) => { + return s + .split('+') + .map((s) => capitalize(s)) + .join('+'); +}; + +export const getDescriptionForChatMode = (mode: string): string => { + switch (mode.toLowerCase()) { + case chatModeLables.vector: + return 'Utilizes vector indexing on text chunks to enable semantic similarity search.'; + case chatModeLables.graph: + return 'Leverages text-to-cypher translation to query a database and retrieve relevant data, ensuring a highly targeted and contextually accurate response.'; + case chatModeLables.graph_vector: + return 'Combines vector indexing on text chunks with graph connections, enhancing search results with contextual relevance by considering relationships between concepts.'; + case chatModeLables.fulltext: + return 'Employs a fulltext index on text chunks for rapid keyword-based search, efficiently identifying documents containing specific words or phrases.'; + case chatModeLables.graph_vector_fulltext: + return 'Merges vector indexing, graph connections, and fulltext indexing for a comprehensive search approach, combining semantic similarity, contextual relevance, and keyword-based search for optimal results.'; + case chatModeLables.entity_vector: + return 'Combines entity node vector indexing with graph connections for accurate entity-based search, providing the most relevant response.'; + default: + return 'Chat mode description not available'; // Fallback description + } +}; +export const getLogo = (mode: string): Record => { + if (mode === 'light') { + return { + Wikipedia: Wikipediadarkmode, + 'web-url': webdarklogo, + 's3 bucket': s3logo, + youtube: youtubedarklogo, + 'gcs bucket': gcslogo, + }; + } + return { + Wikipedia: Wikipediadlogo, + 'web-url': weblogo, + 's3 bucket': s3logo, + youtube: youtubelightlogo, + 'gcs bucket': gcslogo, + }; +}; + +export const generateYouTubeLink = (url: string, startTime: string) => { + try { + const urlObj = new URL(url); + urlObj.searchParams.set('t', startTime); + return urlObj.toString(); + } catch (error) { + console.error('Invalid URL:', error); + return ''; + } +}; +export function isAllowedHost(url: string, allowedHosts: string[]) { + try { + const parsedUrl = new URL(url); + return allowedHosts.includes(parsedUrl.host); + } catch (e) { + return false; + } +} From cc62f5081c8e2e33e391e729ecaf9f86530c600d Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Fri, 20 Sep 2024 06:23:27 +0000 Subject: [PATCH 114/292] added global env for communities --- backend/src/graphDB_dataAccess.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index 279451bdf..643902b16 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -7,6 +7,9 @@ from src.shared.constants import BUCKET_UPLOAD from src.entities.source_node import sourceNode import json +from dotenv import load_dotenv + +load_dotenv() class graphDBdataAccess: @@ -152,11 +155,14 @@ def check_gds_version(self): result = self.graph.query(gds_procedure_count) total_gds_procedures = result[0]['totalGdsProcedures'] if result else 0 - if total_gds_procedures > 0: + enable_communities = os.environ.get('ENABLE_COMMUNITIES','').upper() == "TRUE" + logging.info(f"Enable Communities {enable_communities}") + + if enable_communities and total_gds_procedures > 0: logging.info("GDS is available in the database.") return True else: - logging.info("GDS is not available in the database.") + logging.info("Communities are disabled or GDS is not available in the database.") return False except Exception as e: logging.error(f"An error occurred while checking GDS version: {e}") From 3732c381fcaf11c953afc0b2cf9ce2e2e1332444 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Fri, 20 Sep 2024 06:44:19 +0000 Subject: [PATCH 115/292] comment all security header --- backend/score.py | 5 +++++ backend/src/main.py | 16 ++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/backend/score.py b/backend/score.py index e87fc618e..227ff2901 100644 --- a/backend/score.py +++ b/backend/score.py @@ -48,6 +48,11 @@ def sick(): return False app = FastAPI() +# SecWeb(app=app, Option={'referrer': False, 'xframe': False}) +# app.add_middleware(HSTS, Option={'max-age': 4}) +# app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) +# app.add_middleware(XContentTypeOptions) +# app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) app.add_middleware( CORSMiddleware, diff --git a/backend/src/main.py b/backend/src/main.py index 8bae03809..49906aea8 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -142,20 +142,20 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type): obj_source_node.created_at = datetime.now() match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',obj_source_node.url) logging.info(f"match value: {match}") - cred_path = os.path.join(os.getcwd(),"llm-experiments_credentials.json") - print(f'Credential file path {cred_path}') - if os.path.exists(cred_path): + file_path = os.path.join(os.path.dirname(__file__),"llm-experiments_credentials.json") + logging.info(f'Credential file path {file_path}') + if os.path.exists(file_path): logging.info("File path exist") - with open(cred_path,'r') as secret_file: - secret_data = json.load(secret_file) - logging.info(f"Project id : {secret_data['project_id']}") - logging.info(f"Universal domain: {secret_data['universe_domain']}") + with open(file_path,'r') as file: + data = json.load(file) + logging.info(f"Project id : {data['project_id']}") + logging.info(f"Universal domain: {data['universe_domain']}") else: logging.warning("credntial file path not exist") video_id = parse_qs(urlparse(youtube_url).query).get('v') print(f'Video Id Youtube: {video_id}') - google_api_client = GoogleApiClient(service_account_path=Path(cred_path)) + google_api_client = GoogleApiClient(service_account_path=Path(file_path)) youtube_loader_channel = GoogleApiYoutubeLoader( google_api_client=google_api_client, video_ids=[video_id[0].strip()], add_video_info=True From 763dedf7c0e11c7bccb77bc7ae9f850f586202f7 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Fri, 20 Sep 2024 14:59:18 +0530 Subject: [PATCH 116/292] added threading to chat summarization to improve chat response time (#751) --- backend/src/QA_integration.py | 44 ++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 7ac99f66b..651440437 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -1,11 +1,14 @@ import os import json -from datetime import datetime import time -from typing import Any +import logging +import threading +from concurrent.futures import ThreadPoolExecutor +from datetime import datetime +from typing import Any from dotenv import load_dotenv -import logging + # LangChain imports from langchain_community.vectorstores.neo4j_vector import Neo4jVector @@ -364,22 +367,26 @@ def setup_chat(model, graph, document_names, chat_mode_settings): return llm, doc_retriever, model_name -def process_chat_response(messages,history, question, model, graph, document_names,chat_mode_settings): +def process_chat_response(messages, history, question, model, graph, document_names, chat_mode_settings): try: - llm, doc_retriever, model_version = setup_chat(model, graph, document_names,chat_mode_settings) + llm, doc_retriever, model_version = setup_chat(model, graph, document_names, chat_mode_settings) docs = retrieve_documents(doc_retriever, messages) if docs: - content, result, total_tokens = process_documents(docs, question, messages, llm, model,chat_mode_settings) + content, result, total_tokens = process_documents(docs, question, messages, llm, model, chat_mode_settings) else: content = "I couldn't find any relevant documents to answer your question." - result = {"sources": [], "chunkdetails": [],"entities":[]} + result = {"sources": [], "chunkdetails": [], "entities": []} total_tokens = 0 ai_response = AIMessage(content=content) messages.append(ai_response) - summarize_and_log(history, messages, llm) - + + summarization_thread = threading.Thread(target=summarize_and_log, args=(history, messages, llm)) + summarization_thread.start() + logging.info("Summarization thread started.") + # summarize_and_log(history, messages, llm) + return { "session_id": "", "message": content, @@ -390,7 +397,7 @@ def process_chat_response(messages,history, question, model, graph, document_nam "total_tokens": total_tokens, "response_time": 0, "mode": chat_mode_settings["mode"], - "entities" : result["entities"] + "entities": result["entities"] }, "user": "chatbot" } @@ -413,6 +420,7 @@ def process_chat_response(messages,history, question, model, graph, document_nam } def summarize_and_log(history, stored_messages, llm): + logging.info("Starting summarization in a separate thread.") if not stored_messages: logging.info("No messages to summarize.") return False @@ -433,9 +441,10 @@ def summarize_and_log(history, stored_messages, llm): summary_message = summarization_chain.invoke({"chat_history": stored_messages}) - history.clear() - history.add_user_message("Our current conversation summary till now") - history.add_message(summary_message) + with threading.Lock(): + history.clear() + history.add_user_message("Our current conversation summary till now") + history.add_message(summary_message) history_summarized_time = time.time() - start_time logging.info(f"Chat History summarized in {history_summarized_time:.2f} seconds") @@ -443,8 +452,8 @@ def summarize_and_log(history, stored_messages, llm): return True except Exception as e: - logging.error(f"An error occurred while summarizing messages: {e}") - return False + logging.error(f"An error occurred while summarizing messages: {e}", exc_info=True) + return False def create_graph_chain(model, graph): try: @@ -501,7 +510,10 @@ def process_graph_response(model, graph, question, messages, history): ai_response = AIMessage(content=ai_response_content) messages.append(ai_response) - summarize_and_log(history, messages, qa_llm) + # summarize_and_log(history, messages, qa_llm) + summarization_thread = threading.Thread(target=summarize_and_log, args=(history, messages, qa_llm)) + summarization_thread.start() + logging.info("Summarization thread started.") result = { "session_id": "", From 27272f479e025417dc98348b20d2df077f46b991 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Fri, 20 Sep 2024 14:59:39 +0530 Subject: [PATCH 117/292] formatted the queries and added logic for empty label (#752) --- backend/src/shared/constants.py | 190 ++++++++++++++++++++++++-------- 1 file changed, 141 insertions(+), 49 deletions(-) diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 7856dbec2..bbf579520 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -235,51 +235,97 @@ LOCAL_COMMUNITY_TOP_OUTSIDE_RELS = 10 LOCAL_COMMUNITY_SEARCH_QUERY = """ -WITH collect(node) as nodes, avg(score) as score, collect({{id: elementId(node), score:score}}) as metadata -WITH score, nodes,metadata, -collect {{ - UNWIND nodes as n - MATCH (n)<-[:HAS_ENTITY]->(c:Chunk) - WITH c, count(distinct n) as freq - RETURN c - ORDER BY freq DESC - LIMIT {topChunks} -}} AS chunks, -collect {{ - UNWIND nodes as n - MATCH (n)-[:IN_COMMUNITY]->(c:__Community__) - WITH c, c.community_rank as rank, c.weight AS weight - RETURN c - ORDER BY rank, weight DESC - LIMIT {topCommunities} -}} AS communities, -collect {{ - unwind nodes as n - unwind nodes as m - match (n)-[r]->(m) - RETURN distinct r - // todo should we have a limit here? -}} as rels, -collect {{ - unwind nodes as n - match path = (n)-[r]-(m:__Entity__) - where not m in nodes - with m, collect(distinct r) as rels, count(*) as freq - ORDER BY freq DESC LIMIT {topOutsideRels} - WITH collect(m) as outsideNodes, apoc.coll.flatten(collect(rels)) as rels - RETURN {{ nodes: outsideNodes, rels: rels }} -}} as outside +WITH collect(node) AS nodes, + avg(score) AS score, + collect({{id: elementId(node), score: score}}) AS metadata + +WITH score, nodes, metadata, + + collect {{ + UNWIND nodes AS n + MATCH (n)<-[:HAS_ENTITY]->(c:Chunk) + WITH c, count(distinct n) AS freq + RETURN c + ORDER BY freq DESC + LIMIT {topChunks} + }} AS chunks, + + collect {{ + UNWIND nodes AS n + MATCH (n)-[:IN_COMMUNITY]->(c:__Community__) + WITH c, c.community_rank AS rank, c.weight AS weight + RETURN c + ORDER BY rank, weight DESC + LIMIT {topCommunities} + }} AS communities, + + collect {{ + UNWIND nodes AS n + UNWIND nodes AS m + MATCH (n)-[r]->(m) + RETURN DISTINCT r + // TODO: need to add limit + }} AS rels, + + collect {{ + UNWIND nodes AS n + MATCH path = (n)-[r]-(m:__Entity__) + WHERE NOT m IN nodes + WITH m, collect(distinct r) AS rels, count(*) AS freq + ORDER BY freq DESC + LIMIT {topOutsideRels} + WITH collect(m) AS outsideNodes, apoc.coll.flatten(collect(rels)) AS rels + RETURN {{ nodes: outsideNodes, rels: rels }} + }} AS outside """ LOCAL_COMMUNITY_SEARCH_QUERY_SUFFIX = """ -RETURN {chunks: [c in chunks | c.text], - communities: [c in communities | c.summary], - entities: [n in nodes | apoc.coll.removeAll(labels(n),["__Entity__"])[0] + ":"+ n.id + " " + coalesce(n.description, "")], - relationships: [r in rels | startNode(r).id+" "+type(r)+" "+endNode(r).id], - outside: { - nodes: [n in outside[0].nodes | apoc.coll.removeAll(labels(n),["__Entity__"])[0] + ":"+n.id + " " + coalesce(n.description, "")], - relationships: [r in outside[0].rels | apoc.coll.removeAll(labels(startNode(r)),["__Entity__"])[0] + ":"+startNode(r).id+" "+type(r)+" "+apoc.coll.removeAll(labels(startNode(r)),["__Entity__"])[0] + ":"+endNode(r).id]} - } AS text, score, {entities:metadata} as metadata +RETURN { + chunks: [c IN chunks | c.text], + communities: [c IN communities | c.summary], + entities: [ + n IN nodes | + CASE + WHEN size(labels(n)) > 1 THEN + apoc.coll.removeAll(labels(n), ["__Entity__"])[0] + ":" + n.id + " " + coalesce(n.description, "") + ELSE + n.id + " " + coalesce(n.description, "") + END + ], + relationships: [ + r IN rels | + startNode(r).id + " " + type(r) + " " + endNode(r).id + ], + outside: { + nodes: [ + n IN outside[0].nodes | + CASE + WHEN size(labels(n)) > 1 THEN + apoc.coll.removeAll(labels(n), ["__Entity__"])[0] + ":" + n.id + " " + coalesce(n.description, "") + ELSE + n.id + " " + coalesce(n.description, "") + END + ], + relationships: [ + r IN outside[0].rels | + CASE + WHEN size(labels(startNode(r))) > 1 THEN + apoc.coll.removeAll(labels(startNode(r)), ["__Entity__"])[0] + ":" + startNode(r).id + " " + ELSE + startNode(r).id + " " + END + + type(r) + " " + + CASE + WHEN size(labels(endNode(r))) > 1 THEN + apoc.coll.removeAll(labels(endNode(r)), ["__Entity__"])[0] + ":" + endNode(r).id + ELSE + endNode(r).id + END + ] + } +} AS text, +score, +{entities: metadata} AS metadata """ LOCAL_COMMUNITY_DETAILS_QUERY_PREFIX = """ @@ -289,14 +335,60 @@ """ LOCAL_COMMUNITY_DETAILS_QUERY_SUFFIX = """ WITH * -UNWIND chunks as c +UNWIND chunks AS c MATCH (c)-[:PART_OF]->(d:Document) -RETURN [c {.*, embedding:null, fileName:d.fileName, fileSource:d.fileSource}] as chunks, -[community in communities | community {.*, embedding:null}] as communities, -[node in nodes+outside[0].nodes | {element_id:elementId(node), labels:labels(node), properties:{id:node.id,description:node.description}}] as nodes, -[r in rels+outside[0].rels | {startNode:{element_id:elementId(startNode(r)), labels:labels(startNode(r)), properties:{id:startNode(r).id,description:startNode(r).description}}, - endNode:{element_id:elementId(endNode(r)), labels:labels(endNode(r)), properties:{id:endNode(r).id,description:endNode(r).description}}, - relationship: {type:type(r), element_id:elementId(r)}}] as entities +RETURN + [ + c { + .*, + embedding: null, + fileName: d.fileName, + fileSource: d.fileSource + } + ] AS chunks, + [ + community IN communities | + community { + .*, + embedding: null + } + ] AS communities, + [ + node IN nodes + outside[0].nodes | + { + element_id: elementId(node), + labels: labels(node), + properties: { + id: node.id, + description: node.description + } + } + ] AS nodes, + [ + r IN rels + outside[0].rels | + { + startNode: { + element_id: elementId(startNode(r)), + labels: labels(startNode(r)), + properties: { + id: startNode(r).id, + description: startNode(r).description + } + }, + endNode: { + element_id: elementId(endNode(r)), + labels: labels(endNode(r)), + properties: { + id: endNode(r).id, + description: endNode(r).description + } + }, + relationship: { + type: type(r), + element_id: elementId(r) + } + } + ] AS entities """ CHAT_MODE_CONFIG_MAP= { From 00730d30163842338f1398003f7601f0704ff971 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Fri, 20 Sep 2024 09:49:54 +0000 Subject: [PATCH 118/292] Commented youtube google api code --- backend/score.py | 2 ++ backend/src/main.py | 43 ++++++++++++++++++++++--------------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/backend/score.py b/backend/score.py index 227ff2901..63a26ec5b 100644 --- a/backend/score.py +++ b/backend/score.py @@ -121,6 +121,8 @@ async def create_source_knowledge_graph_url( except Exception as e: error_message = str(e) message = f" Unable to create source node for source type: {source_type} and source: {source}" + json_obj = {'error_message':error_message, 'status':'Failed','db_url':uri,'failed_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(json_obj, "ERROR") logging.exception(f'Exception Stack trace:') return create_api_response('Failed',message=message + error_message[:80],error=error_message,file_source=source_type) finally: diff --git a/backend/src/main.py b/backend/src/main.py index 49906aea8..4f2dc23d2 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -142,35 +142,36 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type): obj_source_node.created_at = datetime.now() match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',obj_source_node.url) logging.info(f"match value: {match}") - file_path = os.path.join(os.path.dirname(__file__),"llm-experiments_credentials.json") - logging.info(f'Credential file path {file_path}') - if os.path.exists(file_path): - logging.info("File path exist") - with open(file_path,'r') as file: - data = json.load(file) - logging.info(f"Project id : {data['project_id']}") - logging.info(f"Universal domain: {data['universe_domain']}") - else: - logging.warning("credntial file path not exist") + # file_path = os.path.join(os.path.dirname(__file__),"llm-experiments_credentials.json") + # logging.info(f'file path {file_path}') + + # if os.path.exists(file_path): + # logging.info("File path exist") + # with open(file_path,'r') as file: + # data = json.load(file) + # # logging.info(f"Project id : {data['project_id']}") + # # logging.info(f"Universal domain: {data['universe_domain']}") + # else: + # logging.warning("credntial file path not exist") video_id = parse_qs(urlparse(youtube_url).query).get('v') print(f'Video Id Youtube: {video_id}') - google_api_client = GoogleApiClient(service_account_path=Path(file_path)) - youtube_loader_channel = GoogleApiYoutubeLoader( - google_api_client=google_api_client, - video_ids=[video_id[0].strip()], add_video_info=True - ) - youtube_transcript = youtube_loader_channel.load() - page_content = youtube_transcript[0].page_content + # google_api_client = GoogleApiClient(service_account_path=Path(file_path)) + # youtube_loader_channel = GoogleApiYoutubeLoader( + # google_api_client=google_api_client, + # video_ids=[video_id[0].strip()], add_video_info=True + # ) + # youtube_transcript = youtube_loader_channel.load() + # page_content = youtube_transcript[0].page_content obj_source_node.file_name = match.group(1)#youtube_transcript[0].metadata["snippet"]["title"] - # transcript= get_youtube_combined_transcript(match.group(1)) - # print(transcript) - if page_content==None or len(page_content)==0: + transcript= get_youtube_combined_transcript(match.group(1)) + print(transcript) + if transcript==None or len(transcript)==0: message = f"Youtube transcript is not available for : {obj_source_node.file_name}" raise Exception(message) else: - obj_source_node.file_size = sys.getsizeof(page_content) + obj_source_node.file_size = sys.getsizeof(transcript) graphDb_data_Access = graphDBdataAccess(graph) graphDb_data_Access.create_source_node(obj_source_node) From 48c9f20052c5568a1e6d754f54b9ba2fc172e4d3 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Mon, 23 Sep 2024 08:51:56 +0000 Subject: [PATCH 119/292] added the error handling for passowrd decrypt error --- .../src/components/ChatBot/ChatInfoModal.tsx | 4 +-- .../src/components/ChatBot/ChatModeToggle.tsx | 6 ++-- frontend/src/components/UI/ErrroBoundary.tsx | 32 +++++++++++++------ frontend/src/utils/Constants.ts | 23 ++++++------- 4 files changed, 38 insertions(+), 27 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 53172256a..74823520a 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -176,7 +176,7 @@ const ChatInfoModal: React.FC = ({ ) : ( {mode != chatModeLables.graph ? Sources used : <>} - {mode != chatModeLables.graph ? Chunks : <>} + {mode != chatModeLables.graph ? Chunks : <>} {mode === chatModeLables.graph_vector || mode === chatModeLables.graph || mode === chatModeLables.graph_vector_fulltext || @@ -190,7 +190,7 @@ const ChatInfoModal: React.FC = ({ ) : ( <> )} - {mode === chatModeLables.entity_vector? Communities : <>} + {mode === chatModeLables.entity_vector ? Communities : <>} )} diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 6c9bf459f..187c37178 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -8,7 +8,7 @@ import { capitalizeWithPlus } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; export default function ChatModeToggle({ menuAnchor, - closeHandler = () => { }, + closeHandler = () => {}, open, anchorPortal = true, disableBackdrop = false, @@ -38,7 +38,9 @@ export default function ChatModeToggle({ }, [isGdsActive, isCommunityAllowed]); const menuItems = useMemo(() => { return memoizedChatModes?.map((m) => { - const isDisabled = Boolean(selectedRows.length && !(m.mode === chatModeLables.vector || m.mode === chatModeLables.graph_vector)); + const isDisabled = Boolean( + selectedRows.length && !(m.mode === chatModeLables.vector || m.mode === chatModeLables.graph_vector) + ); const handleModeChange = () => { if (isDisabled) { setchatMode(chatModeLables.graph_vector); diff --git a/frontend/src/components/UI/ErrroBoundary.tsx b/frontend/src/components/UI/ErrroBoundary.tsx index b76a27695..b96337477 100644 --- a/frontend/src/components/UI/ErrroBoundary.tsx +++ b/frontend/src/components/UI/ErrroBoundary.tsx @@ -2,14 +2,14 @@ import React from 'react'; import { Banner } from '@neo4j-ndl/react'; export default class ErrorBoundary extends React.Component { - state = { hasError: false, errorMessage: '' }; + state = { hasError: false, errorMessage: '', errorName: '' }; static getDerivedStateFromError(_error: unknown) { return { hasError: true }; } componentDidCatch(error: Error, errorInfo: any) { - this.setState({ ...this.state, errorMessage: error.message }); + this.setState({ ...this.state, errorMessage: error.message, errorName: error.name }); console.log({ error }); console.log({ errorInfo }); } @@ -24,18 +24,32 @@ export default class ErrorBoundary extends React.Component { description={ this.state.errorMessage === 'Missing required parameter client_id.' ? 'Please Provide The Google Client ID For GCS Source' + : this.state.errorName === 'InvalidCharacterError' + ? 'Please Clear the Local Storage' : 'Sorry there was a problem loading this page' } title='Something went wrong' floating className='mt-8' - actions={[ - { - label: 'Documentation', - href: 'https://github.com/neo4j-labs/llm-graph-builder', - target: '_blank', - }, - ]} + actions={ + this.state.errorName === 'InvalidCharacterError' + ? [ + { + label: 'Clear local storage', + onClick: () => { + localStorage.clear(); + window.location.reload(); + }, + }, + ] + : [ + { + label: 'Documentation', + href: 'https://github.com/neo4j-labs/llm-graph-builder', + target: '_blank', + }, + ] + } >
    ); diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 0b2f60cba..e249d94d4 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -67,14 +67,14 @@ export const defaultLLM = llms?.includes('openai-gpt-4o') export const chatModeLables = { vector: 'vector', - graph : 'graph', + graph: 'graph', graph_vector: 'graph+vector', fulltext: 'fulltext', graph_vector_fulltext: 'graph+vector+fulltext', - entity_vector:'entity search+vector', + entity_vector: 'entity search+vector', unavailableChatMode: 'Chat mode is unavailable when rows are selected', - selected:'Selected' -} + selected: 'Selected', +}; export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',').map((mode) => ({ @@ -88,28 +88,23 @@ export const chatModes = }, { mode: chatModeLables.graph, - description: - 'Translates text to Cypher queries for precise data retrieval from a graph database.' + description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', }, { mode: chatModeLables.graph_vector, - description: - 'Combines vector indexing and graph connections for contextually enhanced semantic search.', + description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', }, { mode: chatModeLables.fulltext, - description: - 'Conducts fast, keyword-based search using full-text indexing on text chunks.', + description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', }, { mode: chatModeLables.graph_vector_fulltext, - description: - 'Integrates vector, graph, and full-text indexing for comprehensive search results.', + description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', }, { mode: chatModeLables.entity_vector, - description: - 'Uses vector indexing on entity nodes for highly relevant entity-based search.', + description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', }, ]; From 9d89cb2bf829c67636d1db961412f200897b8657 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 24 Sep 2024 06:47:57 +0000 Subject: [PATCH 120/292] wordings changes --- frontend/src/components/UI/ErrroBoundary.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/UI/ErrroBoundary.tsx b/frontend/src/components/UI/ErrroBoundary.tsx index b96337477..4d296b488 100644 --- a/frontend/src/components/UI/ErrroBoundary.tsx +++ b/frontend/src/components/UI/ErrroBoundary.tsx @@ -25,7 +25,7 @@ export default class ErrorBoundary extends React.Component { this.state.errorMessage === 'Missing required parameter client_id.' ? 'Please Provide The Google Client ID For GCS Source' : this.state.errorName === 'InvalidCharacterError' - ? 'Please Clear the Local Storage' + ? "We've updated our security measures. To ensure smooth access, please clear your local storage" : 'Sorry there was a problem loading this page' } title='Something went wrong' @@ -35,7 +35,7 @@ export default class ErrorBoundary extends React.Component { this.state.errorName === 'InvalidCharacterError' ? [ { - label: 'Clear local storage', + label: 'Clear Storage', onClick: () => { localStorage.clear(); window.location.reload(); From 69109f9fdf22120b8e96e38694c41da1ed1c84a2 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Tue, 24 Sep 2024 10:47:13 +0000 Subject: [PATCH 121/292] Exclude default labels from get_labels_and_relationtypes --- backend/src/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/src/main.py b/backend/src/main.py index 4f2dc23d2..07773d3d1 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -590,11 +590,11 @@ def get_labels_and_relationtypes(graph): query = """ RETURN collect { CALL db.labels() yield label - WHERE NOT label IN ['Chunk','_Bloom_Perspective_'] + WHERE NOT label IN ['Chunk','_Bloom_Perspective_', '__Community__', '__Entity__'] return label order by label limit 100 } as labels, collect { CALL db.relationshipTypes() yield relationshipType as type - WHERE NOT type IN ['PART_OF', 'NEXT_CHUNK', 'HAS_ENTITY', '_Bloom_Perspective_'] + WHERE NOT type IN ['PART_OF', 'NEXT_CHUNK', 'HAS_ENTITY', '_Bloom_Perspective_','FIRST_CHUNK'] return type order by type LIMIT 100 } as relationshipTypes """ graphDb_data_Access = graphDBdataAccess(graph) From d41a9209113e8187deb06c5f821da686da05bdaf Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 24 Sep 2024 18:13:19 +0530 Subject: [PATCH 122/292] Post-Processing-Alerts (#758) * added the alerts before and after the post processing * Tooltip changes --- frontend/src/components/ChatBot/Chatbot.tsx | 17 ++++++++++++++--- frontend/src/components/Content.tsx | 6 +++++- .../Popups/Settings/SchemaFromText.tsx | 2 +- frontend/src/utils/Constants.ts | 14 +++++++------- 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 412f485ed..25cb944d3 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -19,6 +19,8 @@ import { buttonCaptions, chatModeLables, tooltips } from '../../utils/Constants' import useSpeechSynthesis from '../../hooks/useSpeech'; import ButtonWithToolTip from '../UI/ButtonWithToolTip'; import FallBackDialog from '../UI/FallBackDialog'; +import { capitalizeWithPlus } from '../../utils/Utils'; +import { capitalize } from '@mui/material'; const InfoModal = lazy(() => import('./ChatInfoModal')); const Chatbot: FC = (props) => { @@ -44,7 +46,7 @@ const Chatbot: FC = (props) => { const [tokensUsed, setTokensUsed] = useState(0); const [cypherQuery, setcypherQuery] = useState(''); const [copyMessageId, setCopyMessageId] = useState(null); - const [chatsMode, setChatsMode] = useState(chatModeLables.graph_vector); + const [chatsMode, setChatsMode] = useState(chatModeLables.graph_vector_fulltext); const [graphEntitites, setgraphEntitites] = useState<[]>([]); const [messageError, setmessageError] = useState(''); @@ -179,7 +181,7 @@ const Chatbot: FC = (props) => { let error; let entitiysearchonly_entities; const datetime = `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`; - const userMessage = { id: Date.now(), user: 'user', message: inputMessage, datetime: datetime }; + const userMessage = { id: Date.now(), user: 'user', message: inputMessage, datetime: datetime, mode: chatMode }; setListMessages([...listMessages, userMessage]); try { setInputMessage(''); @@ -325,6 +327,15 @@ const Chatbot: FC = (props) => { className={`p-4 self-start ${isFullScreen ? 'max-w-[55%]' : ''} ${ chat.user === 'chatbot' ? 'n-bg-palette-neutral-bg-strong' : 'n-bg-palette-primary-bg-weak' } `} + subheader={ + chat.user !== 'chatbot' && chat.mode?.length ? ( + + Chat Mode: {chat.mode.includes('+') ? capitalizeWithPlus(chat.mode) : capitalize(chat.mode)} + + ) : ( + '' + ) + } >
    = (props) => { = ({ } if (processedCount === 1 && queue.isEmpty()) { (async () => { + showNormalToast('Some Q&A functionality will only be available afterwards.'); await postProcessing(userCredentials as UserCredentials, postProcessingTasks); + showSuccessToast('All Q&A functionality is available now.'); })(); } }, [processedCount, userCredentials, queue]); @@ -374,7 +376,9 @@ const Content: React.FC = ({ const addFilesToQueue = async (remainingFiles: CustomFile[]) => { if (!remainingFiles.length) { + showNormalToast('Some Q&A functionality will only be available afterwards.'); await postProcessing(userCredentials as UserCredentials, postProcessingTasks); + showSuccessToast('All Q&A functionality is available now.'); } for (let index = 0; index < remainingFiles.length; index++) { const f = remainingFiles[index]; @@ -787,7 +791,7 @@ const Content: React.FC = ({
    Date: Wed, 25 Sep 2024 12:36:26 +0000 Subject: [PATCH 123/292] added write access check --- backend/score.py | 6 +++-- backend/src/QA_integration.py | 10 ++++----- backend/src/graphDB_dataAccess.py | 37 ++++++++++++++++++++++++++----- backend/src/main.py | 4 ++-- 4 files changed, 43 insertions(+), 14 deletions(-) diff --git a/backend/score.py b/backend/score.py index e47905363..a17e93772 100644 --- a/backend/score.py +++ b/backend/score.py @@ -296,7 +296,9 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), graph = Neo4jGraph( url=uri,username=userName,password=password,database=database,sanitize = True, refresh_schema=True) else: graph = create_graph_database_connection(uri, userName, password, database) - result = await asyncio.to_thread(QA_RAG,graph=graph,model=model,question=question,document_names=document_names,session_id=session_id,mode=mode) + graph_DB_dataAccess = graphDBdataAccess(graph) + write_access = graph_DB_dataAccess.check_account_access(database=database) + result = await asyncio.to_thread(QA_RAG,graph=graph,model=model,question=question,document_names=document_names,session_id=session_id,mode=mode,write_access) total_call_time = time.time() - qa_rag_start_time logging.info(f"Total Response time is {total_call_time:.2f} seconds") @@ -382,7 +384,7 @@ async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=F async def connect(uri=Form(), userName=Form(), password=Form(), database=Form()): try: graph = create_graph_database_connection(uri, userName, password, database) - result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph) + result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph, database) json_obj = {'api_name':'connect','db_url':uri,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index eb6ac5be1..3849c8e29 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -34,7 +34,7 @@ from src.llm import get_llm from src.shared.common_fn import load_embedding_model from src.shared.constants import * - +from src.graphDB_dataAccess import graphDBdataAccess load_dotenv() EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') @@ -560,13 +560,13 @@ def process_graph_response(model, graph, question, messages, history): "user": "chatbot" } -def create_neo4j_chat_message_history(graph, session_id, local=False): +def create_neo4j_chat_message_history(graph, session_id, write_access=True): """ Creates and returns a Neo4jChatMessageHistory instance. """ try: - if not local: + if write_access: history = Neo4jChatMessageHistory( graph=graph, session_id=session_id @@ -594,10 +594,10 @@ def get_chat_mode_settings(mode,settings_map=CHAT_MODE_CONFIG_MAP): return chat_mode_settings -def QA_RAG(graph, model, question, document_names, session_id, mode): +def QA_RAG(graph,model, question, document_names, session_id, mode, write_access=True): logging.info(f"Chat Mode: {mode}") - history = create_neo4j_chat_message_history(graph, session_id) + history = create_neo4j_chat_message_history(graph, session_id, write_access) messages = history.messages user_question = HumanMessage(content=question) diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index 279451bdf..406063074 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -141,6 +141,32 @@ def update_KNN_graph(self): else: logging.info("Vector index does not exist, So KNN graph not update") + def check_account_access(self, database): + query = """ + SHOW USER PRIVILEGES + YIELD * + WHERE graph = $database AND action IN ['read', 'write'] + RETURN COUNT(*) AS readAccessCount + """ + try: + logging.info(f"Checking access for database: {database}") + + result = self.graph.query(query, params={"database": database}) + read_access_count = result[0]["readAccessCount"] if result else 0 + + logging.info(f"Read access count: {read_access_count}") + + if read_access_count > 0: + logging.info("The account has read access.") + return False + else: + logging.info("The account has write access.") + return True + + except Exception as e: + logging.error(f"Error checking account access: {e}") + return False + def check_gds_version(self): try: gds_procedure_count = """ @@ -162,7 +188,7 @@ def check_gds_version(self): logging.error(f"An error occurred while checking GDS version: {e}") return False - def connection_check_and_get_vector_dimensions(self): + def connection_check_and_get_vector_dimensions(self,database): """ Get the vector index dimension from database and application configuration and DB connection status @@ -189,18 +215,19 @@ def connection_check_and_get_vector_dimensions(self): logging.info(f'embedding model:{embeddings} and dimesion:{application_dimension}') gds_status = self.check_gds_version() + write_access = self.check_account_access(database=database) if self.graph: if len(db_vector_dimension) > 0: - return {'db_vector_dimension': db_vector_dimension[0]['vector_dimensions'], 'application_dimension':application_dimension, 'message':"Connection Successful","gds_status":gds_status} + return {'db_vector_dimension': db_vector_dimension[0]['vector_dimensions'], 'application_dimension':application_dimension, 'message':"Connection Successful","gds_status":gds_status,"write_access":write_access} else: if len(db_vector_dimension) == 0 and len(result_chunks) == 0: logging.info("Chunks and vector index does not exists in database") - return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":False,"gds_status":gds_status} + return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":False,"gds_status":gds_status,"write_access":write_access} elif len(db_vector_dimension) == 0 and result_chunks[0]['hasEmbedding']==0 and result_chunks[0]['chunks'] > 0: - return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":True,"gds_status":gds_status} + return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":True,"gds_status":gds_status,"write_access":write_access} else: - return {'message':"Connection Successful","gds_status":gds_status} + return {'message':"Connection Successful","gds_status": gds_status,"write_access":write_access} def execute_query(self, query, param=None): return self.graph.query(query, param) diff --git a/backend/src/main.py b/backend/src/main.py index 588df22ed..c452be8f8 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -502,7 +502,7 @@ def update_graph(graph): graph_DB_dataAccess.update_KNN_graph() -def connection_check_and_get_vector_dimensions(graph): +def connection_check_and_get_vector_dimensions(graph,database): """ Args: uri: URI of the graph to extract @@ -513,7 +513,7 @@ def connection_check_and_get_vector_dimensions(graph): Returns a status of connection from NEO4j is success or failure """ graph_DB_dataAccess = graphDBdataAccess(graph) - return graph_DB_dataAccess.connection_check_and_get_vector_dimensions() + return graph_DB_dataAccess.connection_check_and_get_vector_dimensions(database) def merge_chunks_local(file_name, total_chunks, chunk_dir, merged_dir): From 91a4d30ddcb89042f5a54a883d56bfa2a6746bb7 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Wed, 25 Sep 2024 12:37:46 +0000 Subject: [PATCH 124/292] added write access param --- backend/score.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/score.py b/backend/score.py index a17e93772..b8573fb2f 100644 --- a/backend/score.py +++ b/backend/score.py @@ -298,7 +298,7 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), graph = create_graph_database_connection(uri, userName, password, database) graph_DB_dataAccess = graphDBdataAccess(graph) write_access = graph_DB_dataAccess.check_account_access(database=database) - result = await asyncio.to_thread(QA_RAG,graph=graph,model=model,question=question,document_names=document_names,session_id=session_id,mode=mode,write_access) + result = await asyncio.to_thread(QA_RAG,graph=graph,model=model,question=question,document_names=document_names,session_id=session_id,mode=mode,write_access=write_access) total_call_time = time.time() - qa_rag_start_time logging.info(f"Total Response time is {total_call_time:.2f} seconds") From 2ab06d6eaeba6a37ec048a8364ea329519c6ebe8 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Wed, 25 Sep 2024 14:30:06 +0000 Subject: [PATCH 125/292] added fulltext creation --- backend/score.py | 7 +++-- backend/src/post_processing.py | 55 ++++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/backend/score.py b/backend/score.py index b8573fb2f..b5f0ea23d 100644 --- a/backend/score.py +++ b/backend/score.py @@ -14,7 +14,7 @@ from src.graphDB_dataAccess import graphDBdataAccess from src.graph_query import get_graph_results from src.chunkid_entities import get_entities_from_chunkids -from src.post_processing import create_fulltext, create_entity_embedding +from src.post_processing import create_fulltext_indexes, create_entity_embedding from sse_starlette.sse import EventSourceResponse from src.communities import create_communities import json @@ -261,14 +261,15 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database logging.info(f'Updated KNN Graph') if "enable_hybrid_search_and_fulltext_search_in_bloom" in tasks: - await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="entities") - # await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="keyword") + await asyncio.to_thread(create_fulltext_indexes, uri=uri, username=userName, password=password, database=database) json_obj = {'api_name': 'post_processing/enable_hybrid_search_and_fulltext_search_in_bloom', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logging.info(f'Full Text index created') + if os.environ.get('ENTITY_EMBEDDING','False').upper()=="TRUE" and "materialize_entity_similarities" in tasks: await asyncio.to_thread(create_entity_embedding, graph) json_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logging.info(f'Entity Embeddings created') + if "create_communities" in tasks: model = "openai-gpt-4o" await asyncio.to_thread(create_communities, uri, userName, password, database,model) diff --git a/backend/src/post_processing.py b/backend/src/post_processing.py index 7d038c61b..88d99385a 100644 --- a/backend/src/post_processing.py +++ b/backend/src/post_processing.py @@ -10,30 +10,26 @@ FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX entities FOR (n{labels_str}) ON EACH [n.id, n.description];" FILTER_LABELS = ["Chunk","Document","__Community__"] - HYBRID_SEARCH_INDEX_DROP_QUERY = "DROP INDEX keyword IF EXISTS;" -HYBRID_SEARCH_FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX keyword FOR (n:Chunk) ON EACH [n.text]" +HYBRID_SEARCH_FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX keyword FOR (n:Chunk) ON EACH [n.text]" -def create_fulltext(uri, username, password, database,type): - start_time = time.time() - logging.info("Starting the process of creating a full-text index.") +COMMUNITY_INDEX_DROP_QUERY = "DROP INDEX community_keyword IF EXISTS;" +COMMUNITY_INDEX_FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX community_keyword FOR (n:`__Community__`) ON EACH [n.summary]" - try: - driver = GraphDatabase.driver(uri, auth=(username, password), database=database) - driver.verify_connectivity() - logging.info("Database connectivity verified.") - except Exception as e: - logging.error(f"Failed to create a database driver or verify connectivity: {e}") - return +def create_fulltext(driver,type): + + start_time = time.time() try: with driver.session() as session: try: start_step = time.time() if type == "entities": drop_query = DROP_INDEX_QUERY - else: + elif type == "hybrid": drop_query = HYBRID_SEARCH_INDEX_DROP_QUERY + else: + drop_query = COMMUNITY_INDEX_DROP_QUERY session.run(drop_query) logging.info(f"Dropped existing index (if any) in {time.time() - start_step:.2f} seconds.") except Exception as e: @@ -58,8 +54,11 @@ def create_fulltext(uri, username, password, database,type): start_step = time.time() if type == "entities": fulltext_query = FULL_TEXT_QUERY.format(labels_str=labels_str) - else: + elif type == "hybrid": fulltext_query = HYBRID_SEARCH_FULL_TEXT_QUERY + else: + fulltext_query = COMMUNITY_INDEX_FULL_TEXT_QUERY + session.run(fulltext_query) logging.info(f"Created full-text index in {time.time() - start_step:.2f} seconds.") except Exception as e: @@ -68,10 +67,34 @@ def create_fulltext(uri, username, password, database,type): except Exception as e: logging.error(f"An error occurred during the session: {e}") finally: - driver.close() - logging.info("Driver closed.") logging.info(f"Process completed in {time.time() - start_time:.2f} seconds.") + +def create_fulltext_indexes(uri, username, password, database): + types = ["entities", "hybrid", "community"] + logging.info("Starting the process of creating full-text indexes.") + + try: + driver = GraphDatabase.driver(uri, auth=(username, password), database=database) + driver.verify_connectivity() + logging.info("Database connectivity verified.") + except Exception as e: + logging.error(f"An unexpected error occurred: {e}") + return + + for index_type in types: + try: + logging.info(f"Creating a full-text index for type '{index_type}'.") + create_fulltext(driver, index_type) + logging.info(f"Full-text index for type '{index_type}' created successfully.") + except Exception as e: + logging.error(f"Failed to create full-text index for type '{index_type}': {e}") + + logging.info("Full-text indexes creation process completed.") + driver.close() + logging.info("Driver closed.") + + def create_entity_embedding(graph:Neo4jGraph): rows = fetch_entities_for_embedding(graph) for i in range(0, len(rows), 1000): From 9414f31b82882f5781eba539e459fcbc84227245 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 26 Sep 2024 07:23:07 +0000 Subject: [PATCH 126/292] disabled the write and delete actions for read only user mode --- frontend/src/components/Content.tsx | 21 +- .../src/components/Layout/DrawerDropzone.tsx | 284 ++++++++++-------- frontend/src/components/Layout/PageLayout.tsx | 3 +- frontend/src/components/Layout/SideNav.tsx | 10 +- .../ConnectionModal/ConnectionModal.tsx | 6 +- frontend/src/context/UserCredentials.tsx | 5 + frontend/src/types.ts | 2 + 7 files changed, 190 insertions(+), 141 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 517ada241..92a5e2b76 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -59,8 +59,16 @@ const Content: React.FC = ({ }); const [openGraphView, setOpenGraphView] = useState(false); const [inspectedName, setInspectedName] = useState(''); - const { setUserCredentials, userCredentials, connectionStatus, setConnectionStatus, isGdsActive, setGdsActive } = - useCredentials(); + const { + setUserCredentials, + userCredentials, + connectionStatus, + setConnectionStatus, + isGdsActive, + setGdsActive, + setIsReadOnlyUser, + isReadOnlyUser, + } = useCredentials(); const [showConfirmationModal, setshowConfirmationModal] = useState(false); const [extractLoading, setextractLoading] = useState(false); const [retryFile, setRetryFile] = useState(''); @@ -118,6 +126,9 @@ const Content: React.FC = ({ if (neo4jConnection.isgdsActive !== undefined) { setGdsActive(neo4jConnection.isgdsActive); } + if (neo4jConnection.isReadOnlyUser !== undefined) { + setIsReadOnlyUser(neo4jConnection.isReadOnlyUser); + } } else { setOpenConnection((prev) => ({ ...prev, openPopUp: true })); } @@ -756,7 +767,7 @@ const Content: React.FC = ({ />
    - Neo4j connection + Neo4j connection {isReadOnlyUser ? '(Read only Mode)' : ''} = ({ label='Graph Enhancemnet Settings' className='mr-2.5' onClick={toggleEnhancementDialog} - disabled={!connectionStatus} + disabled={!connectionStatus || isReadOnlyUser} size={isTablet ? 'small' : 'medium'} > Graph Enhancement @@ -883,7 +894,7 @@ const Content: React.FC = ({ } placement='top' onClick={() => setshowDeletePopUp(true)} - disabled={!selectedfileslength} + disabled={!selectedfileslength||isReadOnlyUser} className='ml-0.5' label='Delete Files' size={isTablet ? 'small' : 'medium'} diff --git a/frontend/src/components/Layout/DrawerDropzone.tsx b/frontend/src/components/Layout/DrawerDropzone.tsx index f423f62f9..07d91f642 100644 --- a/frontend/src/components/Layout/DrawerDropzone.tsx +++ b/frontend/src/components/Layout/DrawerDropzone.tsx @@ -11,6 +11,7 @@ import { APP_SOURCES } from '../../utils/Constants'; import GenericButton from '../WebSources/GenericSourceButton'; import GenericModal from '../WebSources/GenericSourceModal'; import FallBackDialog from '../UI/FallBackDialog'; +import { useCredentials } from '../../context/UserCredentials'; const S3Modal = lazy(() => import('../DataSources/AWS/S3Modal')); const GCSModal = lazy(() => import('../DataSources/GCS/GCSModal')); @@ -25,6 +26,7 @@ const DrawerDropzone: React.FC = ({ }) => { const [isBackendConnected, setIsBackendConnected] = useState(false); const { closeAlert, alertState } = useAlertContext(); + const { isReadOnlyUser } = useCredentials(); useEffect(() => { async function getHealthStatus() { @@ -54,143 +56,167 @@ const DrawerDropzone: React.FC = ({ return (
    - - {alertState.showAlert && ( - - )} -
    -
    -
    -
    - {process.env.VITE_ENV != 'PROD' && ( - - - {!isBackendConnected ? : } + {!isReadOnlyUser ? ( + + {alertState.showAlert && ( + + )} +
    +
    +
    +
    + {process.env.VITE_ENV != 'PROD' && ( + + + {!isBackendConnected ? : } + + Backend connection status - Backend connection status - - )} -
    - {process.env.VITE_ENV != 'PROD' ? ( - <> - - {APP_SOURCES != undefined && APP_SOURCES.includes('local') && ( -
    - -
    - )} - {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( - <> - {(APP_SOURCES.includes('youtube') || - APP_SOURCES.includes('wiki') || - APP_SOURCES.includes('web')) && ( -
    - - -
    - )} - {APP_SOURCES.includes('s3') && ( -
    - - }> - - -
    - )} - {APP_SOURCES.includes('gcs') && ( -
    - - }> + )} +
    + {process.env.VITE_ENV != 'PROD' ? ( + <> + + {APP_SOURCES != undefined && APP_SOURCES.includes('local') && ( +
    + +
    + )} + {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || + (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( + <> + {(APP_SOURCES.includes('youtube') || + APP_SOURCES.includes('wiki') || + APP_SOURCES.includes('web')) && ( +
    + + +
    + )} + {APP_SOURCES.includes('s3') && ( +
    + + }> + + +
    + )} + {APP_SOURCES.includes('gcs') && ( +
    + + }> + + +
    + )} + + ) : ( + <> + )} +
    + + ) : ( + <> + + {APP_SOURCES != undefined && APP_SOURCES.includes('local') && ( +
    + +
    + )} + {((APP_SOURCES != undefined && APP_SOURCES.includes('youtube')) || + (APP_SOURCES != undefined && APP_SOURCES.includes('wiki')) || + (APP_SOURCES != undefined && APP_SOURCES.includes('web'))) && ( +
    + + +
    + )} + {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || + (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( + <> + {APP_SOURCES != undefined && APP_SOURCES.includes('s3') && ( +
    + + }> + + +
    + )} + {APP_SOURCES != undefined && APP_SOURCES.includes('gcs') && ( +
    + - -
    - )} - - ) : ( - <> - )} -
    - - ) : ( - <> - - {APP_SOURCES != undefined && APP_SOURCES.includes('local') && ( -
    - -
    - )} - {((APP_SOURCES != undefined && APP_SOURCES.includes('youtube')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('wiki')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('web'))) && ( -
    - - -
    - )} - {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( - <> - {APP_SOURCES != undefined && APP_SOURCES.includes('s3') && ( -
    - - }> - - -
    - )} - {APP_SOURCES != undefined && APP_SOURCES.includes('gcs') && ( -
    - - -
    - )} - - ) : ( - <> - )} -
    - - )} +
    + )} + + ) : ( + <> + )} + + + )} +
    -
    - + + ) : ( + + + This user account does not have permission to access or manage data sources. + + + )}
    ); diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index c2c21116c..07d795484 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -22,6 +22,7 @@ export default function PageLayoutNew({ openSettingsDialog: () => void; }) { const largedesktops = useMediaQuery(`(min-width:1440px )`); + const { userCredentials, connectionStatus } = useCredentials(); const [isLeftExpanded, setIsLeftExpanded] = useState(Boolean(largedesktops)); const [isRightExpanded, setIsRightExpanded] = useState(Boolean(largedesktops)); const [showChatBot, setShowChatBot] = useState(false); @@ -31,7 +32,7 @@ export default function PageLayoutNew({ const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); const [showGenericModal, toggleGenericModal] = useReducer((s) => !s, false); - const { userCredentials, connectionStatus } = useCredentials(); + const toggleLeftDrawer = () => { if (largedesktops) { setIsLeftExpanded(!isLeftExpanded); diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index bdc43c8be..ee3ef510b 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -44,7 +44,7 @@ const SideNav: React.FC = ({ const [chatModeAnchor, setchatModeAnchor] = useState(null); const [showChatMode, setshowChatMode] = useState(false); const largedesktops = useMediaQuery(`(min-width:1440px )`); - const { connectionStatus } = useCredentials(); + const { connectionStatus, isReadOnlyUser } = useCredentials(); const date = new Date(); useEffect(() => { @@ -125,7 +125,7 @@ const SideNav: React.FC = ({ /> )} - {!largedesktops && position === 'left' && ( + {!largedesktops && position === 'left' && !isReadOnlyUser && ( @@ -137,7 +137,7 @@ const SideNav: React.FC = ({ } /> )} - {!largedesktops && APP_SOURCES.includes('gcs') && position === 'left' && ( + {!largedesktops && APP_SOURCES.includes('gcs') && position === 'left' && !isReadOnlyUser && ( @@ -149,7 +149,7 @@ const SideNav: React.FC = ({ } /> )} - {!largedesktops && APP_SOURCES.includes('s3') && position === 'left' && ( + {!largedesktops && APP_SOURCES.includes('s3') && position === 'left' && !isReadOnlyUser && ( @@ -161,7 +161,7 @@ const SideNav: React.FC = ({ } /> )} - {!largedesktops && APP_SOURCES.includes('web') && position === 'left' && ( + {!largedesktops && APP_SOURCES.includes('web') && position === 'left' && !isReadOnlyUser && ( diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 0213e9502..618cc7b80 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -41,7 +41,7 @@ export default function ConnectionModal({ const [username, setUsername] = useState(initialusername ?? 'neo4j'); const [password, setPassword] = useState(''); const [connectionMessage, setMessage] = useState({ type: 'unknown', content: '' }); - const { setUserCredentials, userCredentials, setGdsActive } = useCredentials(); + const { setUserCredentials, userCredentials, setGdsActive, setIsReadOnlyUser } = useCredentials(); const [isLoading, setIsLoading] = useState(false); const [searchParams, setSearchParams] = useSearchParams(); const [userDbVectorIndex, setUserDbVectorIndex] = useState(initialuserdbvectorindex ?? undefined); @@ -208,7 +208,10 @@ export default function ConnectionModal({ throw new Error(response.data.error); } else { const isgdsActive = response.data.data.gds_status; + const isReadOnlyUser = !response.data.data.write_access; setGdsActive(isgdsActive); + console.log({ isReadOnlyUser }); + setIsReadOnlyUser(isReadOnlyUser); localStorage.setItem( 'neo4j.connection', JSON.stringify({ @@ -218,6 +221,7 @@ export default function ConnectionModal({ database: database, userDbVectorIndex, isgdsActive, + isReadOnlyUser, }) ); setUserDbVectorIndex(response.data.data.db_vector_dimension); diff --git a/frontend/src/context/UserCredentials.tsx b/frontend/src/context/UserCredentials.tsx index 1b5eda2da..cd3db2d62 100644 --- a/frontend/src/context/UserCredentials.tsx +++ b/frontend/src/context/UserCredentials.tsx @@ -12,6 +12,8 @@ export const UserConnection = createContext({ setGdsActive: () => false, connectionStatus: false, setConnectionStatus: () => null, + isReadOnlyUser: false, + setIsReadOnlyUser: () => null, }); export const useCredentials = () => { const userCredentials = useContext(UserConnection); @@ -20,6 +22,7 @@ export const useCredentials = () => { const UserCredentialsWrapper: FunctionComponent = (props) => { const [userCredentials, setUserCredentials] = useState(null); const [isGdsActive, setGdsActive] = useState(false); + const [isReadOnlyUser, setIsReadOnlyUser] = useState(false); const [connectionStatus, setConnectionStatus] = useReducer((state) => !state, false); const value = { userCredentials, @@ -28,6 +31,8 @@ const UserCredentialsWrapper: FunctionComponent = (props) => { setGdsActive, connectionStatus, setConnectionStatus, + isReadOnlyUser, + setIsReadOnlyUser, }; return {props.children}; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 9631335a9..10f16e4d6 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -631,6 +631,8 @@ export interface ContextProps { setUserCredentials: (UserCredentials: UserCredentials) => void; isGdsActive: boolean; setGdsActive: Dispatch>; + isReadOnlyUser: boolean; + setIsReadOnlyUser: Dispatch>; connectionStatus: boolean; setConnectionStatus: Dispatch>; } From f116300741a72f9698e7a894dcc25642c5be7844 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 26 Sep 2024 08:32:47 +0000 Subject: [PATCH 127/292] modified query --- backend/src/graphDB_dataAccess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index f4a569d8b..c2e2f6424 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -148,7 +148,7 @@ def check_account_access(self, database): query = """ SHOW USER PRIVILEGES YIELD * - WHERE graph = $database AND action IN ['read', 'write'] + WHERE graph = $database AND action IN ['read'] RETURN COUNT(*) AS readAccessCount """ try: From dbb77d6e822dc8075234db94bda212c8920e347c Mon Sep 17 00:00:00 2001 From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Date: Mon, 23 Sep 2024 09:06:28 +0000 Subject: [PATCH 128/292] test updates --- backend/test_commutiesqa.py | 244 ++++++++++++++++++++++++++++++++++ backend/test_integrationqa.py | 3 +- 2 files changed, 245 insertions(+), 2 deletions(-) create mode 100644 backend/test_commutiesqa.py diff --git a/backend/test_commutiesqa.py b/backend/test_commutiesqa.py new file mode 100644 index 000000000..771cb044c --- /dev/null +++ b/backend/test_commutiesqa.py @@ -0,0 +1,244 @@ +import json +import os +import shutil +import logging +import pandas as pd +from datetime import datetime as dt +from dotenv import load_dotenv +from score import * +from src.main import * +from src.QA_integration import QA_RAG +from langserve import add_routes +from graphdatascience import GraphDataScience +from src.entities.source_node import sourceNode + +# Load environment variables if needed +load_dotenv() +# Constants +URI = '' +USERNAME = '' +PASSWORD = '' +DATABASE = 'persistent1' +CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") +MERGED_DIR = os.path.join(os.path.dirname(__file__), "merged_files") + +# Initialize database connection +graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) + +def create_source_node_local(graph, model, file_name): + """Creates a source node for a local file.""" + source_node = sourceNode() + source_node.file_name = file_name + source_node.file_type = 'pdf' + source_node.file_size = '1087' + source_node.file_source = 'local file' + source_node.model = model + source_node.created_at = dt.now() + graphDB_data_Access = graphDBdataAccess(graph) + graphDB_data_Access.create_source_node(source_node) + return source_node + +def test_graph_from_file_local(model_name): + """Test graph creation from a local file.""" + file_name = 'About Amazon.pdf' + shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf', + os.path.join(MERGED_DIR, file_name)) + + create_source_node_local(graph, model_name, file_name) + merged_file_path = os.path.join(MERGED_DIR, file_name) + + local_file_result = extract_graph_from_file_local_file( + URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '',None + ) + logging.info("Local file processing complete") + print(local_file_result) + + try: + assert local_file_result['status'] == 'Completed' + assert local_file_result['nodeCount'] > 0 + assert local_file_result['relationshipCount'] > 0 + print("Success") + except AssertionError as e: + print("Fail: ", e) + + return local_file_result + +def test_graph_from_wikipedia(model_name): + # try: + """Test graph creation from a Wikipedia page.""" + wiki_query = 'https://en.wikipedia.org/wiki/Ram_Mandir' + source_type = 'Wikipedia' + file_name = "Ram_Mandir" + create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, source_type) + + wiki_result = extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 'en',file_name, '', '',None) + logging.info("Wikipedia test done") + print(wiki_result) + try: + assert wiki_result['status'] == 'Completed' + assert wiki_result['nodeCount'] > 0 + assert wiki_result['relationshipCount'] > 0 + print("Success") + except AssertionError as e: + print("Fail: ", e) + + return wiki_result + # except Exception as ex: + # print(ex) + +def test_graph_website(model_name): + """Test graph creation from a Website page.""" + #graph, model, source_url, source_type + source_url = 'https://www.amazon.com/' + source_type = 'web-url' + file_name = [] + create_source_node_graph_web_url(graph, model_name, source_url, source_type) + + weburl_result = extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url,file_name, '', '',None) + logging.info("WebUrl test done") + print(weburl_result) + + try: + assert weburl_result['status'] == 'Completed' + assert weburl_result['nodeCount'] > 0 + assert weburl_result['relationshipCount'] > 0 + print("Success") + except AssertionError as e: + print("Fail: ", e) + return weburl_result + +def test_graph_from_youtube_video(model_name): + """Test graph creation from a YouTube video.""" + source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' + source_type = 'youtube' + create_source_node_graph_url_youtube(graph, model_name, source_url, source_type) + youtube_result = extract_graph_from_file_youtube( + URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', '' + ) + logging.info("YouTube Video test done") + print(youtube_result) + + try: + assert youtube_result['status'] == 'Completed' + assert youtube_result['nodeCount'] > 1 + assert youtube_result['relationshipCount'] > 1 + print("Success") + except AssertionError as e: + print("Failed: ", e) + + return youtube_result + +def test_chatbot_qna(model_name, mode='vector'): + """Test chatbot QnA functionality for different modes.""" + QA_n_RAG = QA_RAG(graph, model_name, 'Tell me about amazon', '[]', 1, mode) + print(QA_n_RAG) + print(len(QA_n_RAG['message'])) + + + try: + assert len(QA_n_RAG['message']) > 20 + return QA_n_RAG + print("Success") + except AssertionError as e: + print("Failed ", e) + return QA_n_RAG + +#Get Test disconnected_nodes list +def disconected_nodes(): + #graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes() + print(nodes_list[0]["e"]["elementId"]) + status = "False" + if total_nodes['total']>0: + status = "True" + else: + status = "False" + return nodes_list[0]["e"]["elementId"], status + +#Test Delete delete_disconnected_nodes list +def delete_disconected_nodes(lst_element_id): + print(f'disconnect elementid list {lst_element_id}') + #graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id)) + print(f'delete disconnect api result {result}') + if not result: + return "True" + else: + return "False" + +#Test Get Duplicate_nodes +def get_duplicate_nodes(): + #graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + nodes_list, total_nodes = graphDb_data_Access.get_duplicate_nodes_list() + if total_nodes['total']>0: + return "True" + else: + return "False" + +#Test populate_graph_schema +def test_populate_graph_schema_from_text(model): + result_schema = populate_graph_schema_from_text('When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble.', model, True) + print(result_schema) + return result_schema + +# def compare_graph_results(results): +# """ +# Compare graph results across different models. +# Add custom logic here to compare graph data, nodes, and relationships. +# """ +# # Placeholder logic for comparison +# print("Comparing results...") +# for i in range(len(results) - 1): +# result_a = results[i] +# result_b = results[i + 1] +# if result_a == result_b: +# print(f"Result {i} is identical to result {i+1}") +# else: +# print(f"Result {i} differs from result {i+1}") + +def run_tests(): + final_list = [] + error_list = [] + models = ['openai-gpt-3.5','openai-gpt-4o','openai-gpt-4o-mini','gemini-1.5-pro','azure_ai_gpt_35','azure_ai_gpt_4o','ollama_llama3','groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_v3p1_405b','bedrock_claude_3_5_sonnet'] + + for model_name in models: + try: + # final_list.append(test_graph_from_file_local(model_name)) + # final_list.append(test_graph_from_wikipedia(model_name)) + # final_list.append(test_populate_graph_schema_from_text(model_name)) + # final_list.append(test_graph_website(model_name)) + # final_list.append(test_graph_from_youtube_video(model_name)) + final_list.append(test_chatbot_qna(model_name, mode='entity search+vector')) + + + except Exception as e: + error_list.append((model_name, str(e))) + # #Compare and log diffrences in graph results + # # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results + # test_populate_graph_schema_from_text('openai-gpt-4o') + dis_elementid, dis_status = disconected_nodes() + lst_element_id = [dis_elementid] + delt = delete_disconected_nodes(lst_element_id) +# dup = get_duplicate_nodes() + print(final_list) + # schma = test_populate_graph_schema_from_text(model) + # Save final results to CSV + df = pd.DataFrame(final_list) + print(df) + df['execution_date'] = dt.today().strftime('%Y-%m-%d') + df['disconnected_nodes']=dis_status +# df['get_duplicate_nodes']=dup + df['delete_disconected_nodes']=delt + # df['test_populate_graph_schema_from_text'] = schma + df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + + # Save error details to CSV + df_errors = pd.DataFrame(error_list, columns=['Model', 'Error']) + df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_errors.to_csv(f"Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + +if __name__ == "__main__": + run_tests() \ No newline at end of file diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 7b2b110d4..48fdf68c0 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -12,7 +12,6 @@ # Load environment variables if needed load_dotenv() - # Constants URI = '' USERNAME = '' @@ -201,7 +200,7 @@ def test_populate_graph_schema_from_text(model): def run_tests(): final_list = [] error_list = [] - models = ['openai-gpt-3.5','openai-gpt-4o','openai-gpt-4o-mini','gemini-1.0-pro','gemini-1.5-pro','azure_ai_gpt_35','azure_ai_gpt_4o','ollama_llama3','groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_v3p1_405b','bedrock_claude_3_5_sonnet'] + models = ['openai-gpt-3.5','openai-gpt-4o'] for model_name in models: try: From 6d0e18bc9a654a3f2448155f7088d94e22805689 Mon Sep 17 00:00:00 2001 From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Date: Thu, 26 Sep 2024 10:35:25 +0000 Subject: [PATCH 129/292] test uupdated --- backend/test_integrationqa.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 48fdf68c0..832546995 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -200,7 +200,7 @@ def test_populate_graph_schema_from_text(model): def run_tests(): final_list = [] error_list = [] - models = ['openai-gpt-3.5','openai-gpt-4o'] + models = ['openai-gpt-3.5','openai-gpt-4o','openai-gpt-4o-mini','gemini-1.0-pro','gemini-1.5-pro','azure_ai_gpt_35','azure_ai_gpt_4o','ollama_llama3','groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_v3p1_405b','bedrock_claude_3_5_sonnet'] for model_name in models: try: @@ -208,10 +208,14 @@ def run_tests(): final_list.append(test_graph_from_wikipedia(model_name)) final_list.append(test_populate_graph_schema_from_text(model_name)) final_list.append(test_graph_website(model_name)) - final_list.append(test_graph_from_youtube_video(model_name)) + # final_list.append(test_graph_from_youtube_video(model_name)) final_list.append(test_chatbot_qna(model_name)) final_list.append(test_chatbot_qna(model_name, mode='vector')) + final_list.append(test_chatbot_qna(model_name, mode='graph+vector')) + final_list.append(test_chatbot_qna(model_name, mode='fulltext')) final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext')) + final_list.append(test_chatbot_qna(model_name, mode='entity search+vector')) + except Exception as e: error_list.append((model_name, str(e))) # #Compare and log diffrences in graph results From db02bfa98fb4012404dc5547adfc3eb406d74c86 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 26 Sep 2024 19:01:25 +0530 Subject: [PATCH 130/292] Read Only User Support (#766) * added local chat history * added write access check * added write access param * added fulltext creation * disabled the write and delete actions for read only user mode * modified query --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> --- backend/score.py | 13 +- backend/src/QA_integration.py | 59 +++- backend/src/graphDB_dataAccess.py | 37 ++- backend/src/main.py | 4 +- backend/src/post_processing.py | 55 +++- frontend/src/components/Content.tsx | 21 +- .../src/components/Layout/DrawerDropzone.tsx | 284 ++++++++++-------- frontend/src/components/Layout/PageLayout.tsx | 3 +- frontend/src/components/Layout/SideNav.tsx | 10 +- .../ConnectionModal/ConnectionModal.tsx | 6 +- frontend/src/context/UserCredentials.tsx | 5 + frontend/src/types.ts | 2 + 12 files changed, 315 insertions(+), 184 deletions(-) diff --git a/backend/score.py b/backend/score.py index 63a26ec5b..92a573aeb 100644 --- a/backend/score.py +++ b/backend/score.py @@ -14,7 +14,7 @@ from src.graphDB_dataAccess import graphDBdataAccess from src.graph_query import get_graph_results from src.chunkid_entities import get_entities_from_chunkids -from src.post_processing import create_fulltext, create_entity_embedding +from src.post_processing import create_fulltext_indexes, create_entity_embedding from sse_starlette.sse import EventSourceResponse from src.communities import create_communities import json @@ -263,14 +263,15 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database logging.info(f'Updated KNN Graph') if "enable_hybrid_search_and_fulltext_search_in_bloom" in tasks: - await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="entities") - # await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="keyword") + await asyncio.to_thread(create_fulltext_indexes, uri=uri, username=userName, password=password, database=database) json_obj = {'api_name': 'post_processing/enable_hybrid_search_and_fulltext_search_in_bloom', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logging.info(f'Full Text index created') + if os.environ.get('ENTITY_EMBEDDING','False').upper()=="TRUE" and "materialize_entity_similarities" in tasks: await asyncio.to_thread(create_entity_embedding, graph) json_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logging.info(f'Entity Embeddings created') + if "create_communities" in tasks: model = "openai-gpt-4o" await asyncio.to_thread(create_communities, uri, userName, password, database,model) @@ -298,7 +299,9 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), graph = Neo4jGraph( url=uri,username=userName,password=password,database=database,sanitize = True, refresh_schema=True) else: graph = create_graph_database_connection(uri, userName, password, database) - result = await asyncio.to_thread(QA_RAG,graph=graph,model=model,question=question,document_names=document_names,session_id=session_id,mode=mode) + graph_DB_dataAccess = graphDBdataAccess(graph) + write_access = graph_DB_dataAccess.check_account_access(database=database) + result = await asyncio.to_thread(QA_RAG,graph=graph,model=model,question=question,document_names=document_names,session_id=session_id,mode=mode,write_access=write_access) total_call_time = time.time() - qa_rag_start_time logging.info(f"Total Response time is {total_call_time:.2f} seconds") @@ -384,7 +387,7 @@ async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=F async def connect(uri=Form(), userName=Form(), password=Form(), database=Form()): try: graph = create_graph_database_connection(uri, userName, password, database) - result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph) + result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph, database) json_obj = {'api_name':'connect','db_url':uri,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 651440437..868508815 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -22,6 +22,7 @@ from langchain_text_splitters import TokenTextSplitter from langchain_core.messages import HumanMessage, AIMessage from langchain.chains import GraphCypherQAChain +from langchain_community.chat_message_histories import ChatMessageHistory # LangChain chat models from langchain_openai import ChatOpenAI, AzureChatOpenAI @@ -36,11 +37,33 @@ from src.llm import get_llm from src.shared.common_fn import load_embedding_model from src.shared.constants import * - +from src.graphDB_dataAccess import graphDBdataAccess load_dotenv() EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') -EMBEDDING_FUNCTION , _ = load_embedding_model(EMBEDDING_MODEL) +EMBEDDING_FUNCTION , _ = load_embedding_model(EMBEDDING_MODEL) + + + +class SessionChatHistory: + history_dict = {} + + @classmethod + def get_chat_history(cls, session_id): + """Retrieve or create chat message history for a given session ID.""" + if session_id not in cls.history_dict: + logging.info(f"Creating new ChatMessageHistory Local for session ID: {session_id}") + cls.history_dict[session_id] = ChatMessageHistory() + else: + logging.info(f"Retrieved existing ChatMessageHistory Local for session ID: {session_id}") + return cls.history_dict[session_id] + +def get_history_by_session_id(session_id): + try: + return SessionChatHistory.get_chat_history(session_id) + except Exception as e: + logging.error(f"Failed to get history for session ID '{session_id}': {e}") + raise def get_total_tokens(ai_response, llm): try: @@ -71,12 +94,15 @@ def get_total_tokens(ai_response, llm): return total_tokens -def clear_chat_history(graph, session_id): +def clear_chat_history(graph, session_id,local=False): try: - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id - ) + if not local: + history = Neo4jChatMessageHistory( + graph=graph, + session_id=session_id + ) + else: + history = get_history_by_session_id(session_id) history.clear() @@ -546,17 +572,20 @@ def process_graph_response(model, graph, question, messages, history): "user": "chatbot" } -def create_neo4j_chat_message_history(graph, session_id): +def create_neo4j_chat_message_history(graph, session_id, write_access=True): """ Creates and returns a Neo4jChatMessageHistory instance. """ try: - - history = Neo4jChatMessageHistory( - graph=graph, - session_id=session_id - ) + if write_access: + history = Neo4jChatMessageHistory( + graph=graph, + session_id=session_id + ) + return history + + history = get_history_by_session_id(session_id) return history except Exception as e: @@ -577,10 +606,10 @@ def get_chat_mode_settings(mode,settings_map=CHAT_MODE_CONFIG_MAP): return chat_mode_settings -def QA_RAG(graph, model, question, document_names, session_id, mode): +def QA_RAG(graph,model, question, document_names, session_id, mode, write_access=True): logging.info(f"Chat Mode: {mode}") - history = create_neo4j_chat_message_history(graph, session_id) + history = create_neo4j_chat_message_history(graph, session_id, write_access) messages = history.messages user_question = HumanMessage(content=question) diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index 643902b16..c2e2f6424 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -144,6 +144,32 @@ def update_KNN_graph(self): else: logging.info("Vector index does not exist, So KNN graph not update") + def check_account_access(self, database): + query = """ + SHOW USER PRIVILEGES + YIELD * + WHERE graph = $database AND action IN ['read'] + RETURN COUNT(*) AS readAccessCount + """ + try: + logging.info(f"Checking access for database: {database}") + + result = self.graph.query(query, params={"database": database}) + read_access_count = result[0]["readAccessCount"] if result else 0 + + logging.info(f"Read access count: {read_access_count}") + + if read_access_count > 0: + logging.info("The account has read access.") + return False + else: + logging.info("The account has write access.") + return True + + except Exception as e: + logging.error(f"Error checking account access: {e}") + return False + def check_gds_version(self): try: gds_procedure_count = """ @@ -168,7 +194,7 @@ def check_gds_version(self): logging.error(f"An error occurred while checking GDS version: {e}") return False - def connection_check_and_get_vector_dimensions(self): + def connection_check_and_get_vector_dimensions(self,database): """ Get the vector index dimension from database and application configuration and DB connection status @@ -195,18 +221,19 @@ def connection_check_and_get_vector_dimensions(self): logging.info(f'embedding model:{embeddings} and dimesion:{application_dimension}') gds_status = self.check_gds_version() + write_access = self.check_account_access(database=database) if self.graph: if len(db_vector_dimension) > 0: - return {'db_vector_dimension': db_vector_dimension[0]['vector_dimensions'], 'application_dimension':application_dimension, 'message':"Connection Successful","gds_status":gds_status} + return {'db_vector_dimension': db_vector_dimension[0]['vector_dimensions'], 'application_dimension':application_dimension, 'message':"Connection Successful","gds_status":gds_status,"write_access":write_access} else: if len(db_vector_dimension) == 0 and len(result_chunks) == 0: logging.info("Chunks and vector index does not exists in database") - return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":False,"gds_status":gds_status} + return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":False,"gds_status":gds_status,"write_access":write_access} elif len(db_vector_dimension) == 0 and result_chunks[0]['hasEmbedding']==0 and result_chunks[0]['chunks'] > 0: - return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":True,"gds_status":gds_status} + return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":True,"gds_status":gds_status,"write_access":write_access} else: - return {'message':"Connection Successful","gds_status":gds_status} + return {'message':"Connection Successful","gds_status": gds_status,"write_access":write_access} def execute_query(self, query, param=None): return self.graph.query(query, param) diff --git a/backend/src/main.py b/backend/src/main.py index 07773d3d1..9e142a3cb 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -513,7 +513,7 @@ def update_graph(graph): graph_DB_dataAccess.update_KNN_graph() -def connection_check_and_get_vector_dimensions(graph): +def connection_check_and_get_vector_dimensions(graph,database): """ Args: uri: URI of the graph to extract @@ -524,7 +524,7 @@ def connection_check_and_get_vector_dimensions(graph): Returns a status of connection from NEO4j is success or failure """ graph_DB_dataAccess = graphDBdataAccess(graph) - return graph_DB_dataAccess.connection_check_and_get_vector_dimensions() + return graph_DB_dataAccess.connection_check_and_get_vector_dimensions(database) def merge_chunks_local(file_name, total_chunks, chunk_dir, merged_dir): diff --git a/backend/src/post_processing.py b/backend/src/post_processing.py index 7d038c61b..88d99385a 100644 --- a/backend/src/post_processing.py +++ b/backend/src/post_processing.py @@ -10,30 +10,26 @@ FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX entities FOR (n{labels_str}) ON EACH [n.id, n.description];" FILTER_LABELS = ["Chunk","Document","__Community__"] - HYBRID_SEARCH_INDEX_DROP_QUERY = "DROP INDEX keyword IF EXISTS;" -HYBRID_SEARCH_FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX keyword FOR (n:Chunk) ON EACH [n.text]" +HYBRID_SEARCH_FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX keyword FOR (n:Chunk) ON EACH [n.text]" -def create_fulltext(uri, username, password, database,type): - start_time = time.time() - logging.info("Starting the process of creating a full-text index.") +COMMUNITY_INDEX_DROP_QUERY = "DROP INDEX community_keyword IF EXISTS;" +COMMUNITY_INDEX_FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX community_keyword FOR (n:`__Community__`) ON EACH [n.summary]" - try: - driver = GraphDatabase.driver(uri, auth=(username, password), database=database) - driver.verify_connectivity() - logging.info("Database connectivity verified.") - except Exception as e: - logging.error(f"Failed to create a database driver or verify connectivity: {e}") - return +def create_fulltext(driver,type): + + start_time = time.time() try: with driver.session() as session: try: start_step = time.time() if type == "entities": drop_query = DROP_INDEX_QUERY - else: + elif type == "hybrid": drop_query = HYBRID_SEARCH_INDEX_DROP_QUERY + else: + drop_query = COMMUNITY_INDEX_DROP_QUERY session.run(drop_query) logging.info(f"Dropped existing index (if any) in {time.time() - start_step:.2f} seconds.") except Exception as e: @@ -58,8 +54,11 @@ def create_fulltext(uri, username, password, database,type): start_step = time.time() if type == "entities": fulltext_query = FULL_TEXT_QUERY.format(labels_str=labels_str) - else: + elif type == "hybrid": fulltext_query = HYBRID_SEARCH_FULL_TEXT_QUERY + else: + fulltext_query = COMMUNITY_INDEX_FULL_TEXT_QUERY + session.run(fulltext_query) logging.info(f"Created full-text index in {time.time() - start_step:.2f} seconds.") except Exception as e: @@ -68,10 +67,34 @@ def create_fulltext(uri, username, password, database,type): except Exception as e: logging.error(f"An error occurred during the session: {e}") finally: - driver.close() - logging.info("Driver closed.") logging.info(f"Process completed in {time.time() - start_time:.2f} seconds.") + +def create_fulltext_indexes(uri, username, password, database): + types = ["entities", "hybrid", "community"] + logging.info("Starting the process of creating full-text indexes.") + + try: + driver = GraphDatabase.driver(uri, auth=(username, password), database=database) + driver.verify_connectivity() + logging.info("Database connectivity verified.") + except Exception as e: + logging.error(f"An unexpected error occurred: {e}") + return + + for index_type in types: + try: + logging.info(f"Creating a full-text index for type '{index_type}'.") + create_fulltext(driver, index_type) + logging.info(f"Full-text index for type '{index_type}' created successfully.") + except Exception as e: + logging.error(f"Failed to create full-text index for type '{index_type}': {e}") + + logging.info("Full-text indexes creation process completed.") + driver.close() + logging.info("Driver closed.") + + def create_entity_embedding(graph:Neo4jGraph): rows = fetch_entities_for_embedding(graph) for i in range(0, len(rows), 1000): diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index b3efd404c..eeaa56cb6 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -59,8 +59,16 @@ const Content: React.FC = ({ }); const [openGraphView, setOpenGraphView] = useState(false); const [inspectedName, setInspectedName] = useState(''); - const { setUserCredentials, userCredentials, connectionStatus, setConnectionStatus, isGdsActive, setGdsActive } = - useCredentials(); + const { + setUserCredentials, + userCredentials, + connectionStatus, + setConnectionStatus, + isGdsActive, + setGdsActive, + setIsReadOnlyUser, + isReadOnlyUser, + } = useCredentials(); const [showConfirmationModal, setshowConfirmationModal] = useState(false); const [extractLoading, setextractLoading] = useState(false); const [retryFile, setRetryFile] = useState(''); @@ -118,6 +126,9 @@ const Content: React.FC = ({ if (neo4jConnection.isgdsActive !== undefined) { setGdsActive(neo4jConnection.isgdsActive); } + if (neo4jConnection.isReadOnlyUser !== undefined) { + setIsReadOnlyUser(neo4jConnection.isReadOnlyUser); + } } else { setOpenConnection((prev) => ({ ...prev, openPopUp: true })); } @@ -760,7 +771,7 @@ const Content: React.FC = ({ />
    - Neo4j connection + Neo4j connection {isReadOnlyUser ? '(Read only Mode)' : ''} = ({ label='Graph Enhancemnet Settings' className='mr-2.5' onClick={toggleEnhancementDialog} - disabled={!connectionStatus} + disabled={!connectionStatus || isReadOnlyUser} size={isTablet ? 'small' : 'medium'} > Graph Enhancement @@ -887,7 +898,7 @@ const Content: React.FC = ({ } placement='top' onClick={() => setshowDeletePopUp(true)} - disabled={!selectedfileslength} + disabled={!selectedfileslength||isReadOnlyUser} className='ml-0.5' label='Delete Files' size={isTablet ? 'small' : 'medium'} diff --git a/frontend/src/components/Layout/DrawerDropzone.tsx b/frontend/src/components/Layout/DrawerDropzone.tsx index f423f62f9..07d91f642 100644 --- a/frontend/src/components/Layout/DrawerDropzone.tsx +++ b/frontend/src/components/Layout/DrawerDropzone.tsx @@ -11,6 +11,7 @@ import { APP_SOURCES } from '../../utils/Constants'; import GenericButton from '../WebSources/GenericSourceButton'; import GenericModal from '../WebSources/GenericSourceModal'; import FallBackDialog from '../UI/FallBackDialog'; +import { useCredentials } from '../../context/UserCredentials'; const S3Modal = lazy(() => import('../DataSources/AWS/S3Modal')); const GCSModal = lazy(() => import('../DataSources/GCS/GCSModal')); @@ -25,6 +26,7 @@ const DrawerDropzone: React.FC = ({ }) => { const [isBackendConnected, setIsBackendConnected] = useState(false); const { closeAlert, alertState } = useAlertContext(); + const { isReadOnlyUser } = useCredentials(); useEffect(() => { async function getHealthStatus() { @@ -54,143 +56,167 @@ const DrawerDropzone: React.FC = ({ return (
    - - {alertState.showAlert && ( - - )} -
    -
    -
    -
    - {process.env.VITE_ENV != 'PROD' && ( - - - {!isBackendConnected ? : } + {!isReadOnlyUser ? ( + + {alertState.showAlert && ( + + )} +
    +
    +
    +
    + {process.env.VITE_ENV != 'PROD' && ( + + + {!isBackendConnected ? : } + + Backend connection status - Backend connection status - - )} -
    - {process.env.VITE_ENV != 'PROD' ? ( - <> - - {APP_SOURCES != undefined && APP_SOURCES.includes('local') && ( -
    - -
    - )} - {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( - <> - {(APP_SOURCES.includes('youtube') || - APP_SOURCES.includes('wiki') || - APP_SOURCES.includes('web')) && ( -
    - - -
    - )} - {APP_SOURCES.includes('s3') && ( -
    - - }> - - -
    - )} - {APP_SOURCES.includes('gcs') && ( -
    - - }> + )} +
    + {process.env.VITE_ENV != 'PROD' ? ( + <> + + {APP_SOURCES != undefined && APP_SOURCES.includes('local') && ( +
    + +
    + )} + {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || + (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( + <> + {(APP_SOURCES.includes('youtube') || + APP_SOURCES.includes('wiki') || + APP_SOURCES.includes('web')) && ( +
    + + +
    + )} + {APP_SOURCES.includes('s3') && ( +
    + + }> + + +
    + )} + {APP_SOURCES.includes('gcs') && ( +
    + + }> + + +
    + )} + + ) : ( + <> + )} +
    + + ) : ( + <> + + {APP_SOURCES != undefined && APP_SOURCES.includes('local') && ( +
    + +
    + )} + {((APP_SOURCES != undefined && APP_SOURCES.includes('youtube')) || + (APP_SOURCES != undefined && APP_SOURCES.includes('wiki')) || + (APP_SOURCES != undefined && APP_SOURCES.includes('web'))) && ( +
    + + +
    + )} + {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || + (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( + <> + {APP_SOURCES != undefined && APP_SOURCES.includes('s3') && ( +
    + + }> + + +
    + )} + {APP_SOURCES != undefined && APP_SOURCES.includes('gcs') && ( +
    + - -
    - )} - - ) : ( - <> - )} -
    - - ) : ( - <> - - {APP_SOURCES != undefined && APP_SOURCES.includes('local') && ( -
    - -
    - )} - {((APP_SOURCES != undefined && APP_SOURCES.includes('youtube')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('wiki')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('web'))) && ( -
    - - -
    - )} - {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( - <> - {APP_SOURCES != undefined && APP_SOURCES.includes('s3') && ( -
    - - }> - - -
    - )} - {APP_SOURCES != undefined && APP_SOURCES.includes('gcs') && ( -
    - - -
    - )} - - ) : ( - <> - )} -
    - - )} +
    + )} + + ) : ( + <> + )} + + + )} +
    -
    - + + ) : ( + + + This user account does not have permission to access or manage data sources. + + + )}
    ); diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index c2c21116c..07d795484 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -22,6 +22,7 @@ export default function PageLayoutNew({ openSettingsDialog: () => void; }) { const largedesktops = useMediaQuery(`(min-width:1440px )`); + const { userCredentials, connectionStatus } = useCredentials(); const [isLeftExpanded, setIsLeftExpanded] = useState(Boolean(largedesktops)); const [isRightExpanded, setIsRightExpanded] = useState(Boolean(largedesktops)); const [showChatBot, setShowChatBot] = useState(false); @@ -31,7 +32,7 @@ export default function PageLayoutNew({ const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); const [showGenericModal, toggleGenericModal] = useReducer((s) => !s, false); - const { userCredentials, connectionStatus } = useCredentials(); + const toggleLeftDrawer = () => { if (largedesktops) { setIsLeftExpanded(!isLeftExpanded); diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index bdc43c8be..ee3ef510b 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -44,7 +44,7 @@ const SideNav: React.FC = ({ const [chatModeAnchor, setchatModeAnchor] = useState(null); const [showChatMode, setshowChatMode] = useState(false); const largedesktops = useMediaQuery(`(min-width:1440px )`); - const { connectionStatus } = useCredentials(); + const { connectionStatus, isReadOnlyUser } = useCredentials(); const date = new Date(); useEffect(() => { @@ -125,7 +125,7 @@ const SideNav: React.FC = ({ /> )} - {!largedesktops && position === 'left' && ( + {!largedesktops && position === 'left' && !isReadOnlyUser && ( @@ -137,7 +137,7 @@ const SideNav: React.FC = ({ } /> )} - {!largedesktops && APP_SOURCES.includes('gcs') && position === 'left' && ( + {!largedesktops && APP_SOURCES.includes('gcs') && position === 'left' && !isReadOnlyUser && ( @@ -149,7 +149,7 @@ const SideNav: React.FC = ({ } /> )} - {!largedesktops && APP_SOURCES.includes('s3') && position === 'left' && ( + {!largedesktops && APP_SOURCES.includes('s3') && position === 'left' && !isReadOnlyUser && ( @@ -161,7 +161,7 @@ const SideNav: React.FC = ({ } /> )} - {!largedesktops && APP_SOURCES.includes('web') && position === 'left' && ( + {!largedesktops && APP_SOURCES.includes('web') && position === 'left' && !isReadOnlyUser && ( diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 0213e9502..618cc7b80 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -41,7 +41,7 @@ export default function ConnectionModal({ const [username, setUsername] = useState(initialusername ?? 'neo4j'); const [password, setPassword] = useState(''); const [connectionMessage, setMessage] = useState({ type: 'unknown', content: '' }); - const { setUserCredentials, userCredentials, setGdsActive } = useCredentials(); + const { setUserCredentials, userCredentials, setGdsActive, setIsReadOnlyUser } = useCredentials(); const [isLoading, setIsLoading] = useState(false); const [searchParams, setSearchParams] = useSearchParams(); const [userDbVectorIndex, setUserDbVectorIndex] = useState(initialuserdbvectorindex ?? undefined); @@ -208,7 +208,10 @@ export default function ConnectionModal({ throw new Error(response.data.error); } else { const isgdsActive = response.data.data.gds_status; + const isReadOnlyUser = !response.data.data.write_access; setGdsActive(isgdsActive); + console.log({ isReadOnlyUser }); + setIsReadOnlyUser(isReadOnlyUser); localStorage.setItem( 'neo4j.connection', JSON.stringify({ @@ -218,6 +221,7 @@ export default function ConnectionModal({ database: database, userDbVectorIndex, isgdsActive, + isReadOnlyUser, }) ); setUserDbVectorIndex(response.data.data.db_vector_dimension); diff --git a/frontend/src/context/UserCredentials.tsx b/frontend/src/context/UserCredentials.tsx index 1b5eda2da..cd3db2d62 100644 --- a/frontend/src/context/UserCredentials.tsx +++ b/frontend/src/context/UserCredentials.tsx @@ -12,6 +12,8 @@ export const UserConnection = createContext({ setGdsActive: () => false, connectionStatus: false, setConnectionStatus: () => null, + isReadOnlyUser: false, + setIsReadOnlyUser: () => null, }); export const useCredentials = () => { const userCredentials = useContext(UserConnection); @@ -20,6 +22,7 @@ export const useCredentials = () => { const UserCredentialsWrapper: FunctionComponent = (props) => { const [userCredentials, setUserCredentials] = useState(null); const [isGdsActive, setGdsActive] = useState(false); + const [isReadOnlyUser, setIsReadOnlyUser] = useState(false); const [connectionStatus, setConnectionStatus] = useReducer((state) => !state, false); const value = { userCredentials, @@ -28,6 +31,8 @@ const UserCredentialsWrapper: FunctionComponent = (props) => { setGdsActive, connectionStatus, setConnectionStatus, + isReadOnlyUser, + setIsReadOnlyUser, }; return {props.children}; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 9631335a9..10f16e4d6 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -631,6 +631,8 @@ export interface ContextProps { setUserCredentials: (UserCredentials: UserCredentials) => void; isGdsActive: boolean; setGdsActive: Dispatch>; + isReadOnlyUser: boolean; + setIsReadOnlyUser: Dispatch>; connectionStatus: boolean; setConnectionStatus: Dispatch>; } From c7460de4f39e999716607f84d2ba24cc1f2ec583 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 26 Sep 2024 13:34:37 +0000 Subject: [PATCH 131/292] storing the gds status and write access on refresh --- frontend/src/components/Content.tsx | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index eeaa56cb6..e88ff72a3 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -200,6 +200,12 @@ const Content: React.FC = ({ password: btoa(atob(parsedData.password)), }) ); + if (response.data.data.gds_status !== undefined) { + setGdsActive(response.data.data.gds_status); + } + if (response.data.data.write_access !== undefined) { + setIsReadOnlyUser(response.data.data.write_access); + } if ( (response.data.data.application_dimension === response.data.data.db_vector_dimension || response.data.data.db_vector_dimension == 0) && @@ -898,7 +904,7 @@ const Content: React.FC = ({ } placement='top' onClick={() => setshowDeletePopUp(true)} - disabled={!selectedfileslength||isReadOnlyUser} + disabled={!selectedfileslength || isReadOnlyUser} className='ml-0.5' label='Delete Files' size={isTablet ? 'small' : 'medium'} From ba6a9d2834b7fa23f3f70bf5b31b675f999af5d3 Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:59:21 +0530 Subject: [PATCH 132/292] Langchain libs update (#769) * LLMs with latest langchain dev libraries * conflict resolved * all llm models with latest library changes --- backend/requirements.txt | 33 ++++++++-------- backend/src/llm.py | 10 +++-- backend/src/main.py | 16 ++++---- backend/src/shared/constants.py | 52 +++++++++++++++++++++++-- backend/src/shared/schema_extraction.py | 3 +- frontend/src/utils/Constants.ts | 3 +- 6 files changed, 83 insertions(+), 34 deletions(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index 158458ce1..30b767939 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -69,22 +69,22 @@ jsonpath-python==1.0.6 jsonpointer==2.4 json-repair==0.25.2 kiwisolver==1.4.5 -langchain -langchain-aws -langchain-anthropic -langchain-fireworks -langchain-google-genai -langchain-community -langchain-core -langchain-experimental -langchain-google-vertexai -langchain-groq -langchain-openai -langchain-text-splitters +langchain==0.3.0 +langchain-aws==0.2.1 +langchain-anthropic==0.2.1 +langchain-fireworks==0.2.0 +langchain-google-genai==2.0.0 +langchain-community==0.3.0 +langchain-core==0.3.5 +langchain-experimental==0.3.1 +langchain-google-vertexai==2.0.1 +langchain-groq==0.2.0 +langchain-openai==0.2.0 +langchain-text-splitters==0.3.0 langdetect==1.0.9 -langsmith==0.1.83 +langsmith==0.1.128 layoutparser==0.3.4 -langserve==0.2.2 +langserve==0.3.0 #langchain-cli==0.0.25 lxml==5.1.0 MarkupSafe==2.1.5 @@ -100,7 +100,7 @@ numpy==1.26.4 omegaconf==2.3.0 onnx==1.16.1 onnxruntime==1.18.1 -openai==1.35.10 +openai==1.47.1 opencv-python==4.8.0.76 orjson==3.9.15 packaging==23.2 @@ -144,7 +144,6 @@ shapely==2.0.3 six==1.16.0 sniffio==1.3.1 soupsieve==2.5 -SQLAlchemy==2.0.28 starlette==0.37.2 sse-starlette==2.1.2 starlette-session==0.4.3 @@ -159,7 +158,7 @@ transformers==4.42.3 types-protobuf types-requests typing-inspect==0.9.0 -typing_extensions==4.9.0 +typing_extensions==4.12.2 tzdata==2024.1 unstructured==0.14.9 unstructured-client==0.23.8 diff --git a/backend/src/llm.py b/backend/src/llm.py index 505bb89fb..c2335685f 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -9,13 +9,14 @@ import concurrent.futures from concurrent.futures import ThreadPoolExecutor from langchain_experimental.graph_transformers import LLMGraphTransformer +from langchain_core.prompts import ChatPromptTemplate from langchain_anthropic import ChatAnthropic from langchain_fireworks import ChatFireworks from langchain_aws import ChatBedrock from langchain_community.chat_models import ChatOllama import boto3 import google.auth -from src.shared.constants import MODEL_VERSIONS +from src.shared.constants import MODEL_VERSIONS, PROMPT_TO_ALL_LLMs def get_llm(model: str): @@ -28,7 +29,7 @@ def get_llm(model: str): model_name = MODEL_VERSIONS[model] llm = ChatVertexAI( model_name=model_name, - convert_system_message_to_human=True, + #convert_system_message_to_human=True, credentials=credentials, project=project_id, temperature=0, @@ -149,8 +150,9 @@ def get_graph_document_list( if "diffbot_api_key" in dir(llm): llm_transformer = llm else: - if "get_name" in dir(llm) and llm.get_name() == "ChatOllama": + if "get_name" in dir(llm) and llm.get_name() != "ChatOenAI" or llm.get_name() != "ChatVertexAI" or llm.get_name() != "AzureChatOpenAI": node_properties = False + relationship_properties = False else: node_properties = ["description"] relationship_properties = ["description"] @@ -160,6 +162,8 @@ def get_graph_document_list( relationship_properties=relationship_properties, allowed_nodes=allowedNodes, allowed_relationships=allowedRelationship, + ignore_tool_usage=True, + #prompt = ChatPromptTemplate.from_messages(["system",PROMPT_TO_ALL_LLMs]) ) with ThreadPoolExecutor(max_workers=10) as executor: for chunk in combined_chunk_document_list: diff --git a/backend/src/main.py b/backend/src/main.py index 9e142a3cb..41942d500 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -433,14 +433,14 @@ def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, datab node_type= node.type if (node_id, node_type) not in distinct_nodes: distinct_nodes.add((node_id, node_type)) - #get all relations - for relation in graph_document.relationships: - relations.append(relation.type) - - node_count += len(distinct_nodes) - rel_count += len(relations) - print(f'node count internal func:{node_count}') - print(f'relation count internal func:{rel_count}') + #get all relations + for relation in graph_document.relationships: + relations.append(relation.type) + + node_count += len(distinct_nodes) + rel_count += len(relations) + print(f'node count internal func:{node_count}') + print(f'relation count internal func:{rel_count}') return node_count,rel_count def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index bbf579520..f76812553 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -1,7 +1,8 @@ MODEL_VERSIONS = { "openai-gpt-3.5": "gpt-3.5-turbo-0125", "gemini-1.0-pro": "gemini-1.0-pro-001", - "gemini-1.5-pro": "gemini-1.5-pro-preview-0514", + "gemini-1.5-pro": "gemini-1.5-pro-002", + "gemini-1.5-flash": "gemini-1.5-flash-002", "openai-gpt-4": "gpt-4-turbo-2024-04-09", "diffbot" : "gpt-4-turbo-2024-04-09", "openai-gpt-4o-mini": "gpt-4o-mini-2024-07-18", @@ -9,7 +10,7 @@ "groq-llama3" : "llama3-70b-8192" } OPENAI_MODELS = ["openai-gpt-3.5", "openai-gpt-4o", "openai-gpt-4o-mini"] -GEMINI_MODELS = ["gemini-1.0-pro", "gemini-1.5-pro"] +GEMINI_MODELS = ["gemini-1.0-pro", "gemini-1.5-pro", "gemini-1.5-flash"] GROQ_MODELS = ["groq-llama3"] BUCKET_UPLOAD = 'llm-graph-builder-upload' BUCKET_FAILED_FILE = 'llm-graph-builder-failed' @@ -92,14 +93,14 @@ CHAT_DOC_SPLIT_SIZE = 3000 CHAT_EMBEDDING_FILTER_SCORE_THRESHOLD = 0.10 CHAT_TOKEN_CUT_OFF = { - ("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, + ("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro","gemini-1.5-flash","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, ("openai-gpt-4","diffbot" ,'azure_ai_gpt_4o',"openai-gpt-4o", "openai-gpt-4o-mini") : 28, ("ollama_llama3") : 2 } CHAT_TOKEN_CUT_OFF = { - ("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, + ("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro", "gemini-1.5-flash","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, ("openai-gpt-4","diffbot" ,'azure_ai_gpt_4o',"openai-gpt-4o", "openai-gpt-4o-mini") : 28, ("ollama_llama3") : 2 } @@ -476,3 +477,46 @@ START_FROM_BEGINNING = "start_from_beginning" DELETE_ENTITIES_AND_START_FROM_BEGINNING = "delete_entities_and_start_from_beginning" START_FROM_LAST_PROCESSED_POSITION = "start_from_last_processed_position" + +PROMPT_TO_ALL_LLMs = """ +"# Knowledge Graph Instructions for LLMs\n" + "## 1. Overview\n" + "You are a top-tier algorithm designed for extracting information in structured " + "formats to build a knowledge graph.\n" + "Try to capture as much information from the text as possible without " + "sacrificing accuracy. Do not add any information that is not explicitly " + "mentioned in the text.\n" + "- **Nodes** represent entities and concepts.\n" + "- The aim is to achieve simplicity and clarity in the knowledge graph, making it\n" + "accessible for a vast audience.\n" + "## 2. Labeling Nodes\n" + "- **Consistency**: Ensure you use available types for node labels.\n" + "Ensure you use basic or elementary types for node labels.\n" + "- For example, when you identify an entity representing a person, " + "always label it as **'person'**. Avoid using more specific terms " + "like 'mathematician' or 'scientist'." + "- **Node IDs**: Never utilize integers as node IDs. Node IDs should be " + "names or human-readable identifiers found in the text.\n" + "- **Relationships** represent connections between entities or concepts.\n" + "Ensure consistency and generality in relationship types when constructing " + "knowledge graphs. Instead of using specific and momentary types " + "such as 'BECAME_PROFESSOR', use more general and timeless relationship types " + "like 'PROFESSOR'. Make sure to use general and timeless relationship types!\n" + "## 3. Coreference Resolution\n" + "- **Maintain Entity Consistency**: When extracting entities, it's vital to " + "ensure consistency.\n" + 'If an entity, such as "John Doe", is mentioned multiple times in the text ' + 'but is referred to by different names or pronouns (e.g., "Joe", "he"),' + "always use the most complete identifier for that entity throughout the " + 'knowledge graph. In this example, use "John Doe" as the entity ID.\n' + "Remember, the knowledge graph should be coherent and easily understandable, " + "so maintaining consistency in entity references is crucial.\n" + "## 4. Node Properties\n" + "- Dates, URLs, Time, and Numerical Values: Instead of creating separate nodes for + these elements, represent them as properties of existing nodes." + "- Example: Instead of creating a node labeled "2023-03-15" and connecting it to another node + with the relationship "BORN_ON", add a property called "born_on" to the person node with the + value "2023-03-15"." + "## 5. Strict Compliance\n" + "Adhere to the rules strictly. Non-compliance will result in termination." + """ \ No newline at end of file diff --git a/backend/src/shared/schema_extraction.py b/backend/src/shared/schema_extraction.py index 27008acae..80954ba65 100644 --- a/backend/src/shared/schema_extraction.py +++ b/backend/src/shared/schema_extraction.py @@ -1,5 +1,6 @@ from typing import List -from langchain_core.pydantic_v1 import BaseModel, Field +#from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic.v1 import BaseModel, Field from src.llm import get_llm from src.shared.constants import MODEL_VERSIONS from langchain_core.prompts import ChatPromptTemplate diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index cf37e9313..08948ba87 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -45,12 +45,13 @@ export const llms = 'openai-gpt-4o-mini', 'gemini-1.0-pro', 'gemini-1.5-pro', + 'gemini-1.5-flash', 'azure_ai_gpt_35', 'azure_ai_gpt_4o', 'ollama_llama3', 'groq_llama3_70b', 'anthropic_claude_3_5_sonnet', - 'fireworks_v3p1_405b', + 'fireworks_llama_v3p2_90b', 'bedrock_claude_3_5_sonnet', ]; From 3c2e3448598750e943b545f0947460a2658e7f2c Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 27 Sep 2024 08:41:00 +0000 Subject: [PATCH 133/292] fixed the rerendering of the table while file status is processing --- frontend/src/hooks/useSse.tsx | 37 +++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/frontend/src/hooks/useSse.tsx b/frontend/src/hooks/useSse.tsx index 8b063751c..dadf4d184 100644 --- a/frontend/src/hooks/useSse.tsx +++ b/frontend/src/hooks/useSse.tsx @@ -8,6 +8,7 @@ export default function useServerSideEvent( errorHandler: (filename: string) => void ) { const { setFilesData, setProcessedCount } = useFileContext(); + const processcountmap: Record = {}; function updateStatusForLargeFiles(eventSourceRes: eventResponsetypes) { const { fileName, @@ -28,22 +29,26 @@ export default function useServerSideEvent( alertHandler(minutes !== 0, minutes === 0 ? seconds : minutes, fileName); } if (total_chunks) { - setFilesData((prevfiles) => { - return prevfiles.map((curfile) => { - if (curfile.name == fileName) { - return { - ...curfile, - status: total_chunks === processed_chunk ? 'Completed' : status, - NodesCount: nodeCount, - relationshipCount: relationshipCount, - model: model, - processing: processingTime?.toFixed(2), - processingProgress: Math.floor((processed_chunk / total_chunks) * 100), - }; - } - return curfile; + const updateState = processed_chunk != processcountmap[fileName]; + if (updateState) { + processcountmap[fileName] = processed_chunk; + setFilesData((prevfiles) => { + return prevfiles.map((curfile) => { + if (curfile.name == fileName) { + return { + ...curfile, + status: total_chunks === processed_chunk ? 'Completed' : status, + NodesCount: nodeCount, + relationshipCount: relationshipCount, + model: model, + processing: processingTime?.toFixed(2), + processingProgress: Math.floor((processed_chunk / total_chunks) * 100), + }; + } + return curfile; + }); }); - }); + } } } else if (status === 'Completed') { setFilesData((prevfiles) => { @@ -67,6 +72,7 @@ export default function useServerSideEvent( } return prev + 1; }); + delete processcountmap[fileName]; } else if (eventSourceRes.status === 'Failed') { setFilesData((prevfiles) => { return prevfiles.map((curfile) => { @@ -80,6 +86,7 @@ export default function useServerSideEvent( }); }); errorHandler(fileName); + delete processcountmap[fileName]; } } return { From 0d93f57e8b2fb2477daa15e4dd4b8b49d934d035 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 27 Sep 2024 08:50:27 +0000 Subject: [PATCH 134/292] fix: Read Only User Fix --- frontend/src/components/Content.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index e88ff72a3..0995426c1 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -204,7 +204,7 @@ const Content: React.FC = ({ setGdsActive(response.data.data.gds_status); } if (response.data.data.write_access !== undefined) { - setIsReadOnlyUser(response.data.data.write_access); + setIsReadOnlyUser(!response.data.data.write_access); } if ( (response.data.data.application_dimension === response.data.data.db_vector_dimension || From dc994a2a5f1e0807b41a0ad0348551262ab22999 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:32:57 +0530 Subject: [PATCH 135/292] Global search fulltext (#767) * added global search+vector+fulltext mode * added community details in chunk entities * added node ids * updated vector graph query * added entities and modified chat response * added params * api response changes * added chunk entity query * modifies query * payload changes * added nodetails properties * payload new changes * communities check * communities selecetion check * Communities bug solutions (#770) * added local chat history * added write access check * added write access param * labels cahnge for nodes * added fulltext creation * disabled the write and delete actions for read only user mode * modified query * test updates * test uupdated * enable communities * removed the selected prop * Read Only User Support (#766) * added local chat history * added write access check * added write access param * added fulltext creation * disabled the write and delete actions for read only user mode * modified query --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * storing the gds status and write access on refresh * enable communities label change --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> * readonly fixed on refresh * clear chat history * slectedFiles check for Chatbot * clear history --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> --- backend/score.py | 9 +- backend/src/QA_integration.py | 86 ++-- backend/src/chunkid_entities.py | 104 ++-- backend/src/shared/constants.py | 444 +++++++++++++----- .../src/components/ChatBot/ChatInfoModal.tsx | 89 ++-- .../src/components/ChatBot/ChatModeToggle.tsx | 40 +- frontend/src/components/ChatBot/Chatbot.tsx | 65 ++- .../src/components/ChatBot/Communities.tsx | 5 + frontend/src/components/Content.tsx | 37 +- .../components/Graph/CheckboxSelection.tsx | 20 +- .../src/components/Graph/GraphViewModal.tsx | 34 +- frontend/src/components/Layout/PageLayout.tsx | 3 +- frontend/src/components/Layout/SideNav.tsx | 1 - .../PostProcessingCheckList/index.tsx | 2 +- frontend/src/context/UserMessages.tsx | 3 + frontend/src/context/UsersFiles.tsx | 47 +- frontend/src/services/ChunkEntitiesInfo.ts | 12 +- frontend/src/types.ts | 78 ++- frontend/src/utils/Constants.ts | 9 +- frontend/src/utils/Utils.ts | 13 + 20 files changed, 709 insertions(+), 392 deletions(-) diff --git a/backend/score.py b/backend/score.py index 92a573aeb..a9e8e0b93 100644 --- a/backend/score.py +++ b/backend/score.py @@ -271,8 +271,8 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database await asyncio.to_thread(create_entity_embedding, graph) json_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logging.info(f'Entity Embeddings created') - - if "create_communities" in tasks: + + if "enable_communities" in tasks: model = "openai-gpt-4o" await asyncio.to_thread(create_communities, uri, userName, password, database,model) josn_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} @@ -321,10 +321,9 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), gc.collect() @app.post("/chunk_entities") -async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), chunk_ids=Form(None),is_entity=Form()): +async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), nodedetails=Form(None),entities=Form(),mode=Form()): try: - logging.info(f"URI: {uri}, Username: {userName}, chunk_ids: {chunk_ids}") - result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,chunk_ids=chunk_ids,is_entity=json.loads(is_entity.lower())) + result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,nodedetails=nodedetails,entities=entities,mode=mode) json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 868508815..169d2b8c3 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -147,7 +147,6 @@ def get_sources_and_chunks(sources_used, docs): result = { 'sources': sources_used, 'chunkdetails': chunkdetails_list, - "entities" : list() } return result @@ -182,16 +181,19 @@ def format_documents(documents, model): sorted_documents = sorted(documents, key=lambda doc: doc.state.get("query_similarity_score", 0), reverse=True) sorted_documents = sorted_documents[:prompt_token_cutoff] - formatted_docs = [] + formatted_docs = list() sources = set() - lc_entities = {'entities':list()} + entities = dict() + global_communities = list() + for doc in sorted_documents: try: source = doc.metadata.get('source', "unknown") sources.add(source) - lc_entities = doc.metadata if 'entities'in doc.metadata.keys() else lc_entities + entities = doc.metadata['entities'] if 'entities'in doc.metadata.keys() else entities + global_communities = doc.metadata["communitydetails"] if 'communitydetails'in doc.metadata.keys() else global_communities formatted_doc = ( "Document start\n" @@ -204,13 +206,13 @@ def format_documents(documents, model): except Exception as e: logging.error(f"Error formatting document: {e}") - return "\n\n".join(formatted_docs), sources,lc_entities + return "\n\n".join(formatted_docs), sources,entities,global_communities def process_documents(docs, question, messages, llm, model,chat_mode_settings): start_time = time.time() try: - formatted_docs, sources,lc_entities = format_documents(docs, model) + formatted_docs, sources, entitydetails, communities = format_documents(docs, model) rag_chain = get_rag_chain(llm=llm) @@ -219,12 +221,25 @@ def process_documents(docs, question, messages, llm, model,chat_mode_settings): "context": formatted_docs, "input": question }) - if chat_mode_settings["mode"] == "entity search+vector": - result = {'sources': list(), - 'chunkdetails': list()} - result.update(lc_entities) + + result = {'sources': list(), 'nodedetails': dict(), 'entities': dict()} + node_details = {"chunkdetails":list(),"entitydetails":list(),"communitydetails":list()} + entities = {'entityids':list(),"relationshipids":list()} + + if chat_mode_settings["mode"] == CHAT_ENTITY_VECTOR_MODE: + node_details["entitydetails"] = entitydetails + + elif chat_mode_settings["mode"] == CHAT_GLOBAL_VECTOR_FULLTEXT_MODE: + node_details["communitydetails"] = communities else: - result = get_sources_and_chunks(sources, docs) + sources_and_chunks = get_sources_and_chunks(sources, docs) + result['sources'] = sources_and_chunks['sources'] + node_details["chunkdetails"] = sources_and_chunks["chunkdetails"] + entities.update(entitydetails) + + result["nodedetails"] = node_details + result["entities"] = entities + content = ai_response.content total_tokens = get_total_tokens(ai_response, llm) @@ -295,10 +310,13 @@ def create_document_retriever_chain(llm, retriever): def initialize_neo4j_vector(graph, chat_mode_settings): try: - mode = chat_mode_settings.get('mode', 'undefined') retrieval_query = chat_mode_settings.get("retrieval_query") index_name = chat_mode_settings.get("index_name") keyword_index = chat_mode_settings.get("keyword_index", "") + node_label = chat_mode_settings.get("node_label") + embedding_node_property = chat_mode_settings.get("embedding_node_property") + text_node_properties = chat_mode_settings.get("text_node_properties") + if not retrieval_query or not index_name: raise ValueError("Required settings 'retrieval_query' or 'index_name' are missing.") @@ -310,28 +328,21 @@ def initialize_neo4j_vector(graph, chat_mode_settings): retrieval_query=retrieval_query, graph=graph, search_type="hybrid", - node_label="Chunk", - embedding_node_property="embedding", - text_node_properties=["text"], + node_label=node_label, + embedding_node_property=embedding_node_property, + text_node_properties=text_node_properties, keyword_index_name=keyword_index ) logging.info(f"Successfully retrieved Neo4jVector Fulltext index '{index_name}' and keyword index '{keyword_index}'") - elif mode == "entity search+vector": - neo_db = Neo4jVector.from_existing_index( - embedding=EMBEDDING_FUNCTION, - index_name=index_name, - retrieval_query=retrieval_query, - graph=graph - ) else: neo_db = Neo4jVector.from_existing_graph( embedding=EMBEDDING_FUNCTION, index_name=index_name, retrieval_query=retrieval_query, graph=graph, - node_label="Chunk", - embedding_node_property="embedding", - text_node_properties=["text"] + node_label=node_label, + embedding_node_property=embedding_node_property, + text_node_properties=text_node_properties ) logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'") except Exception as e: @@ -359,12 +370,12 @@ def create_retriever(neo_db, document_names, chat_mode_settings,search_k, score_ logging.info(f"Successfully created retriever with search_k={search_k}, score_threshold={score_threshold}") return retriever -def get_neo4j_retriever(graph, document_names,chat_mode_settings, search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): +def get_neo4j_retriever(graph, document_names,chat_mode_settings, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): try: - + neo_db = initialize_neo4j_vector(graph, chat_mode_settings) document_names= list(map(str.strip, json.loads(document_names))) - search_k = LOCAL_COMMUNITY_TOP_K if chat_mode_settings["mode"] == "entity search+vector" else CHAT_SEARCH_KWARG_K + search_k = chat_mode_settings["top_k"] retriever = create_retriever(neo_db, document_names,chat_mode_settings, search_k, score_threshold) return retriever except Exception as e: @@ -397,12 +408,13 @@ def process_chat_response(messages, history, question, model, graph, document_na try: llm, doc_retriever, model_version = setup_chat(model, graph, document_names, chat_mode_settings) - docs = retrieve_documents(doc_retriever, messages) + docs = retrieve_documents(doc_retriever, messages) + if docs: content, result, total_tokens = process_documents(docs, question, messages, llm, model, chat_mode_settings) else: content = "I couldn't find any relevant documents to answer your question." - result = {"sources": [], "chunkdetails": [], "entities": []} + result = {"sources": list(), "nodedetails": list(), "entities": list()} total_tokens = 0 ai_response = AIMessage(content=content) @@ -412,18 +424,18 @@ def process_chat_response(messages, history, question, model, graph, document_na summarization_thread.start() logging.info("Summarization thread started.") # summarize_and_log(history, messages, llm) - + return { "session_id": "", "message": content, "info": { "sources": result["sources"], "model": model_version, - "chunkdetails": result["chunkdetails"], + "nodedetails": result["nodedetails"], "total_tokens": total_tokens, "response_time": 0, "mode": chat_mode_settings["mode"], - "entities": result["entities"] + "entities": result["entities"], }, "user": "chatbot" } @@ -435,12 +447,12 @@ def process_chat_response(messages, history, question, model, graph, document_na "message": "Something went wrong", "info": { "sources": [], - "chunkdetails": [], + "nodedetails": [], "total_tokens": 0, "response_time": 0, "error": f"{type(e).__name__}: {str(e)}", "mode": chat_mode_settings["mode"], - "entities": [] + "entities": [], }, "user": "chatbot" } @@ -593,7 +605,7 @@ def create_neo4j_chat_message_history(graph, session_id, write_access=True): raise def get_chat_mode_settings(mode,settings_map=CHAT_MODE_CONFIG_MAP): - default_settings = settings_map["default"] + default_settings = settings_map[CHAT_DEFAULT_MODE] try: chat_mode_settings = settings_map.get(mode, default_settings) chat_mode_settings["mode"] = mode @@ -615,7 +627,7 @@ def QA_RAG(graph,model, question, document_names, session_id, mode, write_access user_question = HumanMessage(content=question) messages.append(user_question) - if mode == "graph": + if mode == CHAT_GRAPH_MODE: result = process_graph_response(model, graph, question, messages, history) else: chat_mode_settings = get_chat_mode_settings(mode=mode) diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index aa8d1d4ca..8bb9c2198 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -81,16 +81,16 @@ def process_chunk_data(chunk_data): except Exception as e: logging.error(f"chunkid_entities module: An error occurred while extracting the Chunk text from records: {e}") -def process_chunkids(driver, chunk_ids): +def process_chunkids(driver, chunk_ids, entities): """ Processes chunk IDs to retrieve chunk data. """ try: logging.info(f"Starting graph query process for chunk ids: {chunk_ids}") - chunk_ids_list = chunk_ids.split(",") - - records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids_list) + records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids,entityIds=entities["entityids"], relationshipIds=entities["relationshipids"]) result = process_records(records) + result["nodes"].extend(records[0]["nodes"]) + result["nodes"] = remove_duplicate_nodes(result["nodes"]) logging.info(f"Nodes and relationships are processed") result["chunk_data"] = process_chunk_data(records) @@ -118,13 +118,12 @@ def remove_duplicate_nodes(nodes,property="element_id"): return unique_nodes -def process_entityids(driver, chunk_ids): +def process_entityids(driver, entity_ids): """ Processes entity IDs to retrieve local community data. """ try: - logging.info(f"Starting graph query process for entity ids: {chunk_ids}") - entity_ids_list = chunk_ids.split(",") + logging.info(f"Starting graph query process for entity ids: {entity_ids}") query_body = LOCAL_COMMUNITY_SEARCH_QUERY.format( topChunks=LOCAL_COMMUNITY_TOP_CHUNKS, topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, @@ -132,65 +131,82 @@ def process_entityids(driver, chunk_ids): ) query = LOCAL_COMMUNITY_DETAILS_QUERY_PREFIX + query_body + LOCAL_COMMUNITY_DETAILS_QUERY_SUFFIX - records, summary, keys = driver.execute_query(query, entityIds=entity_ids_list) + records, summary, keys = driver.execute_query(query, entityIds=entity_ids) result = process_records(records) if records: result["nodes"].extend(records[0]["nodes"]) result["nodes"] = remove_duplicate_nodes(result["nodes"]) + logging.info(f"Nodes and relationships are processed") + result["chunk_data"] = records[0]["chunks"] result["community_data"] = records[0]["communities"] else: result["chunk_data"] = list() result["community_data"] = list() - logging.info(f"Query process completed successfully for chunk ids: {chunk_ids}") + logging.info(f"Query process completed successfully for chunk ids: {entity_ids}") return result except Exception as e: - logging.error(f"chunkid_entities module: Error processing entity ids: {chunk_ids}. Error: {e}") + logging.error(f"chunkid_entities module: Error processing entity ids: {entity_ids}. Error: {e}") raise -def get_entities_from_chunkids(uri, username, password, database ,chunk_ids,is_entity=False): - """ - Retrieve and process nodes and relationships from a graph database given a list of chunk IDs. +def process_communityids(driver, community_ids): + """Processes community IDs to retrieve community data.""" + try: + logging.info(f"Starting graph query process for community ids: {community_ids}") + query = GLOBAL_COMMUNITY_DETAILS_QUERY + records, summary, keys = driver.execute_query(query, communityids=community_ids) + + result = {"nodes": [], "relationships": [], "chunk_data": []} + result["community_data"] = records[0]["communities"] if records else [] - Parameters: - uri (str): The URI of the graph database. - username (str): The username for the database authentication. - password (str): The password for the database authentication. - chunk_ids (str): A comma-separated string of chunk IDs. + logging.info(f"Query process completed successfully for community ids: {community_ids}") + return result + except Exception as e: + logging.error(f"chunkid_entities module: Error processing community ids: {community_ids}. Error: {e}") + raise - Returns: - dict: A dictionary with 'nodes' and 'relationships' keys containing processed data, or an error message. - """ +def get_entities_from_chunkids(uri, username, password, database ,nodedetails,entities,mode): try: driver = get_graphDB_driver(uri, username, password,database) - if not is_entity: - if chunk_ids: - logging.info(f"chunkid_entities module: Starting for chunk ids : {chunk_ids}") - result = process_chunkids(driver,chunk_ids) + default_response = {"nodes": list(),"relationships": list(),"chunk_data": list(),"community_data": list(),} + + nodedetails = json.loads(nodedetails) + entities = json.loads(entities) + + if mode == CHAT_GLOBAL_VECTOR_FULLTEXT_MODE: + + if "communitydetails" in nodedetails and nodedetails["communitydetails"]: + community_ids = [item["id"] for item in nodedetails["communitydetails"]] + logging.info(f"chunkid_entities module: Starting for community ids: {community_ids}") + return process_communityids(driver, community_ids) + else: + logging.info("chunkid_entities module: No community ids are passed") + return default_response + + elif mode == CHAT_ENTITY_VECTOR_MODE: + + if "entitydetails" in nodedetails and nodedetails["entitydetails"]: + entity_ids = [item["id"] for item in nodedetails["entitydetails"]] + logging.info(f"chunkid_entities module: Starting for entity ids: {entity_ids}") + return process_entityids(driver, entity_ids) else: - logging.info(f"chunkid_entities module: No chunk ids are passed") - result = { - "nodes": [], - "relationships": [], - "chunk_data":[] - } - return result - if chunk_ids: - result = process_entityids(driver,chunk_ids) - logging.info(f"chunkid_entities module: Starting for entity ids : {chunk_ids}") + logging.info("chunkid_entities module: No entity ids are passed") + return default_response + else: - logging.info(f"chunkid_entities module: No entity ids are passed") - result = { - "nodes": [], - "relationships": [], - "chunk_data":[], - "community_data":[] - } - return result + + if "chunkdetails" in nodedetails and nodedetails["chunkdetails"]: + chunk_ids = [item["id"] for item in nodedetails["chunkdetails"]] + logging.info(f"chunkid_entities module: Starting for chunk ids: {chunk_ids}") + return process_chunkids(driver, chunk_ids, entities) + else: + logging.info("chunkid_entities module: No chunk ids are passed") + return default_response except Exception as e: logging.error(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Error: {str(e)}") - raise Exception(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Please check the logs for more details.") from e \ No newline at end of file + raise Exception(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Please check the logs for more details.") from e + diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index f76812553..834459fd1 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -22,82 +22,150 @@ GRAPH_QUERY = """ MATCH docs = (d:Document) WHERE d.fileName IN $document_names -WITH docs, d ORDER BY d.createdAt DESC -// fetch chunks for documents, currently with limit +WITH docs, d +ORDER BY d.createdAt DESC + +// Fetch chunks for documents, currently with limit CALL {{ WITH d - OPTIONAL MATCH chunks=(d)<-[:PART_OF|FIRST_CHUNK]-(c:Chunk) + OPTIONAL MATCH chunks = (d)<-[:PART_OF|FIRST_CHUNK]-(c:Chunk) RETURN c, chunks LIMIT {graph_chunk_limit} }} -WITH collect(distinct docs) as docs, collect(distinct chunks) as chunks, collect(distinct c) as selectedChunks -WITH docs, chunks, selectedChunks -// select relationships between selected chunks +WITH collect(distinct docs) AS docs, + collect(distinct chunks) AS chunks, + collect(distinct c) AS selectedChunks + +// Select relationships between selected chunks WITH *, -[ c in selectedChunks | [p=(c)-[:NEXT_CHUNK|SIMILAR]-(other) WHERE other IN selectedChunks | p]] as chunkRels + [c IN selectedChunks | + [p = (c)-[:NEXT_CHUNK|SIMILAR]-(other) + WHERE other IN selectedChunks | p]] AS chunkRels -// fetch entities and relationships between entities +// Fetch entities and relationships between entities CALL {{ WITH selectedChunks - UNWIND selectedChunks as c - - OPTIONAL MATCH entities=(c:Chunk)-[:HAS_ENTITY]->(e) - OPTIONAL MATCH entityRels=(e)--(e2:!Chunk) WHERE exists {{ + UNWIND selectedChunks AS c + OPTIONAL MATCH entities = (c:Chunk)-[:HAS_ENTITY]->(e) + OPTIONAL MATCH entityRels = (e)--(e2:!Chunk) + WHERE exists {{ (e2)<-[:HAS_ENTITY]-(other) WHERE other IN selectedChunks }} - RETURN entities , entityRels, collect(DISTINCT e) as entity + RETURN entities, entityRels, collect(DISTINCT e) AS entity }} -WITH docs,chunks,chunkRels, collect(entities) as entities, collect(entityRels) as entityRels, entity + +WITH docs, chunks, chunkRels, + collect(entities) AS entities, + collect(entityRels) AS entityRels, + entity WITH * CALL {{ - with entity - unwind entity as n - OPTIONAL MATCH community=(n:__Entity__)-[:IN_COMMUNITY]->(p:__Community__) - OPTIONAL MATCH parentcommunity=(p)-[:PARENT_COMMUNITY*]->(p2:__Community__) - return collect(community) as communities , collect(parentcommunity) as parentCommunities + WITH entity + UNWIND entity AS n + OPTIONAL MATCH community = (n:__Entity__)-[:IN_COMMUNITY]->(p:__Community__) + OPTIONAL MATCH parentcommunity = (p)-[:PARENT_COMMUNITY*]->(p2:__Community__) + RETURN collect(community) AS communities, + collect(parentcommunity) AS parentCommunities +}} + +WITH apoc.coll.flatten(docs + chunks + chunkRels + entities + entityRels + communities + parentCommunities, true) AS paths + +// Distinct nodes and relationships +CALL {{ + WITH paths + UNWIND paths AS path + UNWIND nodes(path) AS node + WITH distinct node + RETURN collect(node /* {{.*, labels:labels(node), elementId:elementId(node), embedding:null, text:null}} */) AS nodes }} -WITH apoc.coll.flatten(docs + chunks + chunkRels + entities + entityRels + communities + parentCommunities, true) as paths +CALL {{ + WITH paths + UNWIND paths AS path + UNWIND relationships(path) AS rel + RETURN collect(distinct rel) AS rels +}} -// distinct nodes and rels -CALL {{ WITH paths UNWIND paths AS path UNWIND nodes(path) as node WITH distinct node - RETURN collect(node /* {{.*, labels:labels(node), elementId:elementId(node), embedding:null, text:null}} */) AS nodes }} -CALL {{ WITH paths UNWIND paths AS path UNWIND relationships(path) as rel RETURN collect(distinct rel) AS rels }} RETURN nodes, rels """ CHUNK_QUERY = """ -match (chunk:Chunk) where chunk.id IN $chunksIds - +MATCH (chunk:Chunk) +WHERE chunk.id IN $chunksIds MATCH (chunk)-[:PART_OF]->(d:Document) -CALL {WITH chunk -MATCH (chunk)-[:HAS_ENTITY]->(e) -MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk &! Document &! `__Community__`) -UNWIND rels as r -RETURN collect(distinct r) as rels -} -WITH d, collect(distinct chunk) as chunks, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels -RETURN d as doc, [chunk in chunks | chunk {.*, embedding:null}] as chunks, - [r in rels | {startNode:{element_id:elementId(startNode(r)), labels:labels(startNode(r)), properties:{id:startNode(r).id,description:startNode(r).description}}, - endNode:{element_id:elementId(endNode(r)), labels:labels(endNode(r)), properties:{id:endNode(r).id,description:endNode(r).description}}, - relationship: {type:type(r), element_id:elementId(r)}}] as entities + +WITH d, + collect(distinct chunk) AS chunks + +// Collect relationships and nodes +WITH d, chunks, + collect { + MATCH ()-[r]->() + WHERE elementId(r) IN $relationshipIds + RETURN r + } AS rels, + collect { + MATCH (e) + WHERE elementId(e) IN $entityIds + RETURN e + } AS nodes + +WITH d, + chunks, + apoc.coll.toSet(apoc.coll.flatten(rels)) AS rels, + nodes + +RETURN + d AS doc, + [chunk IN chunks | + chunk {.*, embedding: null} + ] AS chunks, + [ + node IN nodes | + { + element_id: elementId(node), + labels: labels(node), + properties: { + id: node.id, + description: node.description + } + } + ] AS nodes, + [ + r IN rels | + { + startNode: { + element_id: elementId(startNode(r)), + labels: labels(startNode(r)), + properties: { + id: startNode(r).id, + description: startNode(r).description + } + }, + endNode: { + element_id: elementId(endNode(r)), + labels: labels(endNode(r)), + properties: { + id: endNode(r).id, + description: endNode(r).description + } + }, + relationship: { + type: type(r), + element_id: elementId(r) + } + } + ] AS entities """ ## CHAT SETUP CHAT_MAX_TOKENS = 1000 -CHAT_SEARCH_KWARG_K = 10 CHAT_SEARCH_KWARG_SCORE_THRESHOLD = 0.5 CHAT_DOC_SPLIT_SIZE = 3000 CHAT_EMBEDDING_FILTER_SCORE_THRESHOLD = 0.10 -CHAT_TOKEN_CUT_OFF = { - ("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro","gemini-1.5-flash","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, - ("openai-gpt-4","diffbot" ,'azure_ai_gpt_4o',"openai-gpt-4o", "openai-gpt-4o-mini") : 28, - ("ollama_llama3") : 2 -} - CHAT_TOKEN_CUT_OFF = { ("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro", "gemini-1.5-flash","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, @@ -150,85 +218,146 @@ QUESTION_TRANSFORM_TEMPLATE = "Given the below conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else." -## CHAT QUERIES +## CHAT QUERIES +VECTOR_SEARCH_TOP_K = 10 + VECTOR_SEARCH_QUERY = """ WITH node AS chunk, score MATCH (chunk)-[:PART_OF]->(d:Document) -WITH d, collect(distinct {chunk: chunk, score: score}) as chunks, avg(score) as avg_score +WITH d, + collect(distinct {chunk: chunk, score: score}) AS chunks, + avg(score) AS avg_score + WITH d, avg_score, - [c in chunks | c.chunk.text] as texts, - [c in chunks | {id: c.chunk.id, score: c.score}] as chunkdetails -WITH d, avg_score, chunkdetails, - apoc.text.join(texts, "\n----\n") as text -RETURN text, avg_score AS score, - {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} as metadata + [c IN chunks | c.chunk.text] AS texts, + [c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails + +WITH d, avg_score, chunkdetails, + apoc.text.join(texts, "\n----\n") AS text + +RETURN text, + avg_score AS score, + {source: COALESCE(CASE WHEN d.url CONTAINS "None" + THEN d.fileName + ELSE d.url + END, + d.fileName), + chunkdetails: chunkdetails} AS metadata """ +### Vector graph search VECTOR_GRAPH_SEARCH_ENTITY_LIMIT = 25 +VECTOR_GRAPH_SEARCH_EMBEDDING_MIN_MATCH = 0.3 +VECTOR_GRAPH_SEARCH_EMBEDDING_MAX_MATCH = 0.9 +VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MINMAX_CASE = 10 +VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MAX_CASE = 25 -VECTOR_GRAPH_SEARCH_QUERY = """ +VECTOR_GRAPH_SEARCH_QUERY_PREFIX = """ WITH node as chunk, score // find the document of the chunk MATCH (chunk)-[:PART_OF]->(d:Document) - // aggregate chunk-details -WITH d, collect(DISTINCT {{chunk: chunk, score: score}}) AS chunks, avg(score) as avg_score +WITH d, collect(DISTINCT {chunk: chunk, score: score}) AS chunks, avg(score) as avg_score // fetch entities -CALL {{ WITH chunks +CALL { WITH chunks UNWIND chunks as chunkScore WITH chunkScore.chunk as chunk -// entities connected to the chunk -// todo only return entities that are actually in the chunk, remember we connect all extracted entities to all chunks -// todo sort by relevancy (embeddding comparision?) cut off after X (e.g. 25) nodes? -OPTIONAL MATCH (chunk)-[:HAS_ENTITY]->(e) -WITH e, count(*) as numChunks -ORDER BY numChunks DESC LIMIT {no_of_entites} -// depending on match to query embedding either 1 or 2 step expansion -WITH CASE WHEN true // vector.similarity.cosine($embedding, e.embedding ) <= 0.95 -THEN -collect {{ OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,1}}(:!Chunk&!Document) RETURN path }} -ELSE -collect {{ OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,2}}(:!Chunk&!Document) RETURN path }} -END as paths, e -WITH apoc.coll.toSet(apoc.coll.flatten(collect(distinct paths))) as paths, collect(distinct e) as entities -// de-duplicate nodes and relationships across chunks -RETURN collect{{ unwind paths as p unwind relationships(p) as r return distinct r}} as rels, -collect{{ unwind paths as p unwind nodes(p) as n return distinct n}} as nodes, entities -}} +""" -// generate metadata and text components for chunks, nodes and relationships +VECTOR_GRAPH_SEARCH_ENTITY_QUERY = """ + OPTIONAL MATCH (chunk)-[:HAS_ENTITY]->(e) + WITH e, count(*) AS numChunks + ORDER BY numChunks DESC + LIMIT {no_of_entites} + + WITH + CASE + WHEN e.embedding IS NULL OR ({embedding_match_min} <= vector.similarity.cosine($embedding, e.embedding) AND vector.similarity.cosine($embedding, e.embedding) <= {embedding_match_max}) THEN + collect {{ + OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,1}}(:!Chunk&!Document&!__Community__) + RETURN path LIMIT {entity_limit_minmax_case} + }} + WHEN e.embedding IS NOT NULL AND vector.similarity.cosine($embedding, e.embedding) > {embedding_match_max} THEN + collect {{ + OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,2}}(:!Chunk&!Document&!__Community__) + RETURN path LIMIT {entity_limit_max_case} + }} + ELSE + collect {{ + MATCH path=(e) + RETURN path + }} + END AS paths, e +""" + +VECTOR_GRAPH_SEARCH_QUERY_SUFFIX = """ + WITH apoc.coll.toSet(apoc.coll.flatten(collect(DISTINCT paths))) AS paths, + collect(DISTINCT e) AS entities + + // De-duplicate nodes and relationships across chunks + RETURN + collect { + UNWIND paths AS p + UNWIND relationships(p) AS r + RETURN DISTINCT r + } AS rels, + collect { + UNWIND paths AS p + UNWIND nodes(p) AS n + RETURN DISTINCT n + } AS nodes, + entities +} + +// Generate metadata and text components for chunks, nodes, and relationships WITH d, avg_score, [c IN chunks | c.chunk.text] AS texts, - [c IN chunks | {{id: c.chunk.id, score: c.score}}] AS chunkdetails, - apoc.coll.sort([n in nodes | - -coalesce(apoc.coll.removeAll(labels(n),['__Entity__'])[0],"") +":"+ -n.id + (case when n.description is not null then " ("+ n.description+")" else "" end)]) as nodeTexts, - apoc.coll.sort([r in rels - // optional filter if we limit the node-set - // WHERE startNode(r) in nodes AND endNode(r) in nodes - | -coalesce(apoc.coll.removeAll(labels(startNode(r)),['__Entity__'])[0],"") +":"+ -startNode(r).id + -" " + type(r) + " " + -coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" + endNode(r).id -]) as relTexts -, entities -// combine texts into response-text - -WITH d, avg_score,chunkdetails, -"Text Content:\\n" + -apoc.text.join(texts,"\\n----\\n") + -"\\n----\\nEntities:\\n"+ -apoc.text.join(nodeTexts,"\\n") + -"\\n----\\nRelationships:\\n" + -apoc.text.join(relTexts,"\\n") - -as text,entities - -RETURN text, avg_score as score, {{length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails}} AS metadata + [c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails, + [n IN nodes | elementId(n)] AS entityIds, + [r IN rels | elementId(r)] AS relIds, + apoc.coll.sort([ + n IN nodes | + coalesce(apoc.coll.removeAll(labels(n), ['__Entity__'])[0], "") + ":" + + n.id + + (CASE WHEN n.description IS NOT NULL THEN " (" + n.description + ")" ELSE "" END) + ]) AS nodeTexts, + apoc.coll.sort([ + r IN rels | + coalesce(apoc.coll.removeAll(labels(startNode(r)), ['__Entity__'])[0], "") + ":" + + startNode(r).id + " " + type(r) + " " + + coalesce(apoc.coll.removeAll(labels(endNode(r)), ['__Entity__'])[0], "") + ":" + endNode(r).id + ]) AS relTexts, + entities + +// Combine texts into response text +WITH d, avg_score, chunkdetails, entityIds, relIds, + "Text Content:\n" + apoc.text.join(texts, "\n----\n") + + "\n----\nEntities:\n" + apoc.text.join(nodeTexts, "\n") + + "\n----\nRelationships:\n" + apoc.text.join(relTexts, "\n") AS text, + entities + +RETURN + text, + avg_score AS score, + { + length: size(text), + source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), + chunkdetails: chunkdetails, + entities : { + entityids: entityIds, + relationshipids: relIds + } + } AS metadata """ +VECTOR_GRAPH_SEARCH_QUERY = VECTOR_GRAPH_SEARCH_QUERY_PREFIX+ VECTOR_GRAPH_SEARCH_ENTITY_QUERY.format( + no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT, + embedding_match_min=VECTOR_GRAPH_SEARCH_EMBEDDING_MIN_MATCH, + embedding_match_max=VECTOR_GRAPH_SEARCH_EMBEDDING_MAX_MATCH, + entity_limit_minmax_case=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MINMAX_CASE, + entity_limit_max_case=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MAX_CASE +) + VECTOR_GRAPH_SEARCH_QUERY_SUFFIX + ### Local community search LOCAL_COMMUNITY_TOP_K = 10 LOCAL_COMMUNITY_TOP_CHUNKS = 3 @@ -392,45 +521,112 @@ ] AS entities """ +LOCAL_COMMUNITY_SEARCH_QUERY_FORMATTED = LOCAL_COMMUNITY_SEARCH_QUERY.format( + topChunks=LOCAL_COMMUNITY_TOP_CHUNKS, + topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, + topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS)+LOCAL_COMMUNITY_SEARCH_QUERY_SUFFIX + +GLOBAL_SEARCH_TOP_K = 10 + +GLOBAL_VECTOR_SEARCH_QUERY = """ +WITH collect(distinct {community: node, score: score}) AS communities, + avg(score) AS avg_score + +WITH avg_score, + [c IN communities | c.community.summary] AS texts, + [c IN communities | {id: elementId(c.community), score: c.score}] AS communityDetails + +WITH avg_score, communityDetails, + apoc.text.join(texts, "\n----\n") AS text + +RETURN text, + avg_score AS score, + {communitydetails: communityDetails} AS metadata +""" + + + +GLOBAL_COMMUNITY_DETAILS_QUERY = """ +MATCH (community:__Community__) +WHERE elementId(community) IN $communityids +WITH collect(distinct community) AS communities +RETURN [community IN communities | + community {.*, embedding: null, elementid: elementId(community)}] AS communities +""" + +## CHAT MODES + +CHAT_VECTOR_MODE = "vector" +CHAT_FULLTEXT_MODE = "fulltext" +CHAT_ENTITY_VECTOR_MODE = "entity search+vector" +CHAT_VECTOR_GRAPH_MODE = "graph+vector" +CHAT_VECTOR_GRAPH_FULLTEXT_MODE = "graph+vector+fulltext" +CHAT_GLOBAL_VECTOR_FULLTEXT_MODE = "global search+vector+fulltext" +CHAT_GRAPH_MODE = "graph" +CHAT_DEFAULT_MODE = "graph+vector+fulltext" + CHAT_MODE_CONFIG_MAP= { - "vector": { + CHAT_VECTOR_MODE : { "retrieval_query": VECTOR_SEARCH_QUERY, + "top_k": VECTOR_SEARCH_TOP_K, "index_name": "vector", "keyword_index": None, - "document_filter": True + "document_filter": True, + "node_label": "Chunk", + "embedding_node_property":"embedding", + "text_node_properties":["text"], + }, - "fulltext": { + CHAT_FULLTEXT_MODE : { "retrieval_query": VECTOR_SEARCH_QUERY, + "top_k": VECTOR_SEARCH_TOP_K, "index_name": "vector", "keyword_index": "keyword", - "document_filter": False + "document_filter": False, + "node_label": "Chunk", + "embedding_node_property":"embedding", + "text_node_properties":["text"], }, - "entity search+vector": { - "retrieval_query": LOCAL_COMMUNITY_SEARCH_QUERY.format(topChunks=LOCAL_COMMUNITY_TOP_CHUNKS, - topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, - topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS)+LOCAL_COMMUNITY_SEARCH_QUERY_SUFFIX, + CHAT_ENTITY_VECTOR_MODE : { + "retrieval_query": LOCAL_COMMUNITY_SEARCH_QUERY_FORMATTED, + "top_k": LOCAL_COMMUNITY_TOP_K, "index_name": "entity_vector", "keyword_index": None, - "document_filter": False + "document_filter": False, + "node_label": "__Entity__", + "embedding_node_property":"embedding", + "text_node_properties":["id"], }, - "graph+vector": { - "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT), + CHAT_VECTOR_GRAPH_MODE : { + "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY, + "top_k": VECTOR_SEARCH_TOP_K, "index_name": "vector", "keyword_index": None, - "document_filter": True + "document_filter": True, + "node_label": "Chunk", + "embedding_node_property":"embedding", + "text_node_properties":["text"], }, - "graph+vector+fulltext": { - "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT), + CHAT_VECTOR_GRAPH_FULLTEXT_MODE : { + "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY, + "top_k": VECTOR_SEARCH_TOP_K, "index_name": "vector", "keyword_index": "keyword", - "document_filter": False + "document_filter": False, + "node_label": "Chunk", + "embedding_node_property":"embedding", + "text_node_properties":["text"], + }, + CHAT_GLOBAL_VECTOR_FULLTEXT_MODE : { + "retrieval_query": GLOBAL_VECTOR_SEARCH_QUERY, + "top_k": GLOBAL_SEARCH_TOP_K, + "index_name": "community_vector", + "keyword_index": "community_keyword", + "document_filter": False, + "node_label": "__Community__", + "embedding_node_property":"embedding", + "text_node_properties":["summary"], }, - "default": { - "retrieval_query": VECTOR_SEARCH_QUERY, - "index_name": "vector", - "keyword_index": None, - "document_filter": True - } } YOUTUBE_CHUNK_SIZE_SECONDS = 60 diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 74823520a..568868000 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -38,15 +38,24 @@ const ChatInfoModal: React.FC = ({ model, total_tokens, response_time, - chunk_ids, + nodeDetails, mode, cypher_query, graphonly_entities, error, + entities_ids }) => { const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); - const [activeTab, setActiveTab] = useState(error?.length ? 10 : mode === chatModeLables.graph ? 4 : 3); + const [activeTab, setActiveTab] = useState( + error?.length + ? 10 + : mode === chatModeLables.global_vector + ? 7 + : mode === chatModeLables.graph + ? 4 + : 3 + ); const [infoEntities, setInfoEntities] = useState([]); const [communities, setCommunities] = useState([]); const [loading, setLoading] = useState(false); @@ -86,11 +95,8 @@ const ChatInfoModal: React.FC = ({ (async () => { setLoading(true); try { - const response = await chunkEntitiesAPI( - userCredentials as UserCredentials, - chunk_ids.map((c) => c.id).join(','), - userCredentials?.database, - mode === chatModeLables.entity_vector + const response = await chunkEntitiesAPI(userCredentials as UserCredentials, userCredentials?.database, nodeDetails, entities_ids, + mode, ); if (response.data.status === 'Failure') { throw new Error(response.data.error); @@ -123,17 +129,27 @@ const ChatInfoModal: React.FC = ({ }) ); setRelationships(relationshipsData ?? []); - setCommunities(communitiesData ?? []); + setCommunities(communitiesData.map((community: any) => { + const communityScore = nodeDetails?.communitydetails?.find((c: any) => + c.id === community.element_id); + return { + ...community, + score: communityScore?.score || 1 + }; + }) + .sort((a: any, b: any) => b.score - a.score) + ); + setChunks( chunksData .map((chunk: any) => { - const chunkScore = chunk_ids.find((chunkdetail) => chunkdetail.id === chunk.id); - return ( - { - ...chunk, - score: chunkScore?.score, - } ?? [] - ); + const chunkScore = nodeDetails?.chunkdetails?.find((c: any) => + c.id + === chunk.id); + return { + ...chunk, + score: chunkScore?.score + }; }) .sort((a: any, b: any) => b.score - a.score) ); @@ -147,7 +163,7 @@ const ChatInfoModal: React.FC = ({ () => { setcopiedText(false); }; - }, [chunk_ids, mode, error]); + }, [nodeDetails, mode, error]); const onChangeTabs = (tabId: number) => { setActiveTab(tabId); @@ -175,22 +191,33 @@ const ChatInfoModal: React.FC = ({ {error} ) : ( - {mode != chatModeLables.graph ? Sources used : <>} - {mode != chatModeLables.graph ? Chunks : <>} - {mode === chatModeLables.graph_vector || - mode === chatModeLables.graph || - mode === chatModeLables.graph_vector_fulltext || - mode === chatModeLables.entity_vector ? ( - Top Entities used - ) : ( - <> - )} - {mode === chatModeLables.graph && cypher_query?.trim()?.length ? ( - Generated Cypher Query + {mode === chatModeLables.global_vector ? ( + // Only show the Communities tab if mode is global + Communities ) : ( - <> + <> + {mode != chatModeLables.graph ? Sources used : <>} + {mode != chatModeLables.graph ? Chunks : <>} + {mode === chatModeLables.graph_vector || + mode === chatModeLables.graph || + mode === chatModeLables.graph_vector_fulltext || + mode === chatModeLables.entity_vector ? ( + Top Entities used + ) : ( + <> + )} + {mode === chatModeLables.graph && cypher_query?.trim()?.length ? ( + Generated Cypher Query + ) : ( + <> + )} + {mode === chatModeLables.entity_vector ? ( + Communities + ) : ( + <> + )} + )} - {mode === chatModeLables.entity_vector ? Communities : <>} )} @@ -217,7 +244,7 @@ const ChatInfoModal: React.FC = ({ className='min-h-40' /> - {mode === chatModeLables.entity_vector ? ( + {mode === chatModeLables.entity_vector || mode === chatModeLables.global_vector ? ( diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 187c37178..b3d493236 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -8,7 +8,7 @@ import { capitalizeWithPlus } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; export default function ChatModeToggle({ menuAnchor, - closeHandler = () => {}, + closeHandler = () => { }, open, anchorPortal = true, disableBackdrop = false, @@ -20,30 +20,35 @@ export default function ChatModeToggle({ disableBackdrop?: boolean; }) { const { setchatMode, chatMode, postProcessingTasks, selectedRows } = useFileContext(); - const isCommunityAllowed = postProcessingTasks.includes('create_communities'); + const isCommunityAllowed = postProcessingTasks.includes('enable_communities'); const { isGdsActive } = useCredentials(); useEffect(() => { - if (selectedRows.length !== 0) { - setchatMode(chatModeLables.graph_vector); - } else { - setchatMode(chatModeLables.graph_vector_fulltext); + // If rows are selected, the mode is valid (either vector or graph+vector) + if (selectedRows.length > 0) { + if (!(chatMode === chatModeLables.vector || chatMode === chatModeLables.graph_vector)) { + setchatMode(chatModeLables.graph_vector); + } } - }, [selectedRows]); - + }, [selectedRows.length, chatMode, setchatMode]); const memoizedChatModes = useMemo(() => { return isGdsActive && isCommunityAllowed ? chatModes - : chatModes?.filter((m) => !m.mode.includes(chatModeLables.entity_vector)); + : chatModes?.filter( + (m) => + !m.mode.includes(chatModeLables.entity_vector) && + !m.mode.includes(chatModeLables.global_vector) + ); }, [isGdsActive, isCommunityAllowed]); const menuItems = useMemo(() => { return memoizedChatModes?.map((m) => { const isDisabled = Boolean( - selectedRows.length && !(m.mode === chatModeLables.vector || m.mode === chatModeLables.graph_vector) + selectedRows.length && + !(m.mode === chatModeLables.vector || m.mode === chatModeLables.graph_vector) ); const handleModeChange = () => { if (isDisabled) { - setchatMode(chatModeLables.graph_vector); + setchatMode(chatModeLables.graph_vector); } else { setchatMode(m.mode); } @@ -52,11 +57,11 @@ export default function ChatModeToggle({ return { title: (
    - + {m.mode.includes('+') ? capitalizeWithPlus(m.mode) : capitalize(m.mode)}
    - {m.description} + {m.description}
    ), @@ -66,12 +71,12 @@ export default function ChatModeToggle({ {chatMode === m.mode && ( <> - {chatModeLables.selected} + {chatModeLables.selected} )} {isDisabled && ( <> - {chatModeLables.unavailableChatMode} + {chatModeLables.unavailableChatMode} )} @@ -82,10 +87,9 @@ export default function ChatModeToggle({ useEffect(() => { if (!selectedRows.length && !chatMode) { - setchatMode(chatMode); + setchatMode(chatModeLables.graph_vector_fulltext); } - }, [setchatMode, selectedRows, chatMode]); - + }, [setchatMode, selectedRows.length, chatMode]); return ( = (props) => { const [sourcesModal, setSourcesModal] = useState([]); const [modelModal, setModelModal] = useState(''); const [responseTime, setResponseTime] = useState(0); - const [chunkModal, setChunkModal] = useState([]); const [tokensUsed, setTokensUsed] = useState(0); const [cypherQuery, setcypherQuery] = useState(''); const [copyMessageId, setCopyMessageId] = useState(null); const [chatsMode, setChatsMode] = useState(chatModeLables.graph_vector_fulltext); const [graphEntitites, setgraphEntitites] = useState<[]>([]); const [messageError, setmessageError] = useState(''); + const [entitiesModal, setEntitiesModal] = useState([]); + const [nodeDetailsModal, setNodeDetailsModal] = useState({}); const [value, copy] = useCopyToClipboard(); const { speak, cancel } = useSpeechSynthesis({ @@ -56,16 +57,10 @@ const Chatbot: FC = (props) => { setListMessages((msgs) => msgs.map((msg) => ({ ...msg, speaking: false }))); }, }); - let selectedFileNames: CustomFile[] = []; - for (let index = 0; index < selectedRows.length; index++) { - const id = selectedRows[index]; - for (let index = 0; index < filesData.length; index++) { - const f = filesData[index]; - if (f.id === id) { - selectedFileNames.push(f); - } - } - } + + let selectedFileNames: CustomFile[] = filesData.filter(f => + selectedRows.includes(f.id) && ['Completed'].includes(f.status) + ); const handleInputChange = (e: React.ChangeEvent) => { setInputMessage(e.target.value); @@ -82,7 +77,6 @@ const Chatbot: FC = (props) => { reply: string; sources?: string[]; model?: string; - chunk_ids?: chunk[]; total_tokens?: number; response_time?: number; speaking?: boolean; @@ -91,7 +85,8 @@ const Chatbot: FC = (props) => { cypher_query?: string; graphonly_entities?: []; error?: string; - entitiysearchonly_entities?: chunk[]; + entitiysearchonly_entities?: string[]; + nodeDetails?: nodeDetailsProps; }, index = 0 ) => { @@ -113,7 +108,6 @@ const Chatbot: FC = (props) => { isLoading: true, sources: response?.sources, model: response?.model, - chunks: response?.chunk_ids, total_tokens: response.total_tokens, response_time: response?.response_time, speaking: false, @@ -123,6 +117,7 @@ const Chatbot: FC = (props) => { graphonly_entities: response?.graphonly_entities, error: response.error, entitiysearchonly_entities: response.entitiysearchonly_entities, + nodeDetails: response?.nodeDetails }, ]); } else { @@ -136,7 +131,6 @@ const Chatbot: FC = (props) => { lastmsg.isLoading = false; lastmsg.sources = response?.sources; lastmsg.model = response?.model; - lastmsg.chunk_ids = response?.chunk_ids; lastmsg.total_tokens = response?.total_tokens; lastmsg.response_time = response?.response_time; lastmsg.speaking = false; @@ -146,6 +140,7 @@ const Chatbot: FC = (props) => { lastmsg.graphonly_entities = response.graphonly_entities; lastmsg.error = response.error; lastmsg.entities = response.entitiysearchonly_entities; + lastmsg.nodeDetails = response?.nodeDetails; return msgs.map((msg, index) => { if (index === msgs.length - 1) { return lastmsg; @@ -172,7 +167,7 @@ const Chatbot: FC = (props) => { let chatbotReply; let chatSources; let chatModel; - let chatChunks; + let chatnodedetails; let chatTimeTaken; let chatTokensUsed; let chatingMode; @@ -180,6 +175,7 @@ const Chatbot: FC = (props) => { let graphonly_entities; let error; let entitiysearchonly_entities; + let chatEntities; const datetime = `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`; const userMessage = { id: Date.now(), user: 'user', message: inputMessage, datetime: datetime, mode: chatMode }; setListMessages([...listMessages, userMessage]); @@ -198,7 +194,7 @@ const Chatbot: FC = (props) => { chatbotReply = chatresponse?.data?.data?.message; chatSources = chatresponse?.data?.data?.info.sources; chatModel = chatresponse?.data?.data?.info.model; - chatChunks = chatresponse?.data?.data?.info.chunkdetails; + chatnodedetails = chatresponse?.data?.data?.info.nodedetails; chatTokensUsed = chatresponse?.data?.data?.info.total_tokens; chatTimeTaken = chatresponse?.data?.data?.info.response_time; chatingMode = chatresponse?.data?.data?.info?.mode; @@ -206,11 +202,11 @@ const Chatbot: FC = (props) => { graphonly_entities = chatresponse?.data.data.info.context ?? []; entitiysearchonly_entities = chatresponse?.data.data.info.entities; error = chatresponse.data.data.info.error ?? ''; + chatEntities = chatresponse.data.data.info.entities; const finalbotReply = { reply: chatbotReply, sources: chatSources, model: chatModel, - chunk_ids: chatChunks, total_tokens: chatTokensUsed, response_time: chatTimeTaken, speaking: false, @@ -220,6 +216,8 @@ const Chatbot: FC = (props) => { graphonly_entities, error, entitiysearchonly_entities, + chatEntities, + nodeDetails: chatnodedetails }; simulateTypingEffect(finalbotReply); } catch (error) { @@ -324,9 +322,8 @@ const Chatbot: FC = (props) => { @@ -338,11 +335,10 @@ const Chatbot: FC = (props) => { } >
    {chat.message}
    @@ -366,15 +362,14 @@ const Chatbot: FC = (props) => { setModelModal(chat.model ?? ''); setSourcesModal(chat.sources ?? []); setResponseTime(chat.response_time ?? 0); - setChunkModal( - chat.mode === 'entity search+vector' ? chat.entities ?? [] : chat.chunk_ids ?? [] - ); setTokensUsed(chat.total_tokens ?? 0); setcypherQuery(chat.cypher_query ?? ''); setShowInfoModal(true); setChatsMode(chat.mode ?? ''); setgraphEntitites(chat.graphonly_entities ?? []); + setEntitiesModal(chat.entities ?? []); setmessageError(chat.error ?? ''); + setNodeDetailsModal(chat.nodeDetails ?? {}) }} > {' '} @@ -428,9 +423,8 @@ const Chatbot: FC = (props) => {
    = (props) => { disabled={loading || !connectionStatus} size='medium' > - {buttonCaptions.ask} {selectedRows != undefined && selectedRows.length > 0 && `(${selectedRows.length})`} + {buttonCaptions.ask} {selectedFileNames != undefined && selectedFileNames.length > 0 && `(${selectedFileNames.length})`}
    @@ -473,13 +467,14 @@ const Chatbot: FC = (props) => { diff --git a/frontend/src/components/ChatBot/Communities.tsx b/frontend/src/components/ChatBot/Communities.tsx index 9b530fd0f..11869d3d4 100644 --- a/frontend/src/components/ChatBot/Communities.tsx +++ b/frontend/src/components/ChatBot/Communities.tsx @@ -4,6 +4,7 @@ import ReactMarkdown from 'react-markdown'; import { CommunitiesProps } from '../../types'; const CommunitiesInfo: FC = ({ loading, communities }) => { + console.log('communities', communities); return ( <> {loading ? ( @@ -20,6 +21,10 @@ const CommunitiesInfo: FC = ({ loading, communities }) => { ID : {community.id}
    + + Score : + {community.score} + {community.summary}
  • diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 0995426c1..3f7564461 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -33,6 +33,7 @@ import DatabaseStatusIcon from './UI/DatabaseStatusIcon'; import RetryConfirmationDialog from './Popups/RetryConfirmation/Index'; import retry from '../services/retry'; import { showErrorToast, showNormalToast, showSuccessToast } from '../utils/toasts'; +import { useMessageContext } from '../context/UserMessages'; const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); @@ -79,7 +80,7 @@ const Content: React.FC = ({ alertType: 'neutral', alertMessage: '', }); - + const { setClearHistoryData } = useMessageContext(); const { filesData, setFilesData, @@ -94,6 +95,7 @@ const Content: React.FC = ({ queue, processedCount, setProcessedCount, + setPostProcessingVal } = useFileContext(); const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'>('tableView'); const [showDeletePopUp, setshowDeletePopUp] = useState(false); @@ -160,7 +162,7 @@ const Content: React.FC = ({ (async () => { showNormalToast('Some Q&A functionality will only be available afterwards.'); await postProcessing(userCredentials as UserCredentials, postProcessingTasks); - showSuccessToast('All Q&A functionality is available now.'); + showSuccessToast('All Q&A functionality is available now.'); })(); } }, [processedCount, userCredentials, queue]); @@ -510,9 +512,8 @@ const Content: React.FC = ({ const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; const uriCoded = userCredentials?.uri.replace(/:\d+$/, ''); - const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${ - userCredentials?.port ?? '7687' - }`; + const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${userCredentials?.port ?? '7687' + }`; const encodedURL = encodeURIComponent(connectURL); const replacedUrl = bloomUrl?.replace('{CONNECT_URL}', encodedURL); window.open(replacedUrl, '_blank'); @@ -522,10 +523,10 @@ const Content: React.FC = ({ isLeftExpanded && isRightExpanded ? 'contentWithExpansion' : isRightExpanded - ? 'contentWithChatBot' - : !isLeftExpanded && !isRightExpanded - ? 'w-[calc(100%-128px)]' - : 'contentWithDropzoneExpansion'; + ? 'contentWithChatBot' + : !isLeftExpanded && !isRightExpanded + ? 'w-[calc(100%-128px)]' + : 'contentWithDropzoneExpansion'; const handleGraphView = () => { setOpenGraphView(true); @@ -540,6 +541,7 @@ const Content: React.FC = ({ setUserCredentials({ uri: '', password: '', userName: '', database: '' }); setSelectedNodes([]); setSelectedRels([]); + setClearHistoryData(true); }; const retryHandler = async (filename: string, retryoption: string) => { @@ -555,12 +557,12 @@ const Content: React.FC = ({ return prev.map((f) => { return f.name === filename ? { - ...f, - status: 'Reprocess', - processingProgress: isStartFromBegining ? 0 : f.processingProgress, - NodesCount: isStartFromBegining ? 0 : f.NodesCount, - relationshipCount: isStartFromBegining ? 0 : f.relationshipCount, - } + ...f, + status: 'Reprocess', + processingProgress: isStartFromBegining ? 0 : f.processingProgress, + NodesCount: isStartFromBegining ? 0 : f.NodesCount, + relationshipCount: isStartFromBegining ? 0 : f.relationshipCount, + } : f; }); }); @@ -849,9 +851,8 @@ const Content: React.FC = ({ handleGenerateGraph={processWaitingFilesOnRefresh} > diff --git a/frontend/src/components/Graph/CheckboxSelection.tsx b/frontend/src/components/Graph/CheckboxSelection.tsx index b335a4324..738a1dd01 100644 --- a/frontend/src/components/Graph/CheckboxSelection.tsx +++ b/frontend/src/components/Graph/CheckboxSelection.tsx @@ -3,21 +3,23 @@ import React from 'react'; import { CheckboxSectionProps } from '../../types'; import { graphLabels } from '../../utils/Constants'; -const CheckboxSelection: React.FC = ({ graphType, loading, handleChange, isgds }) => ( +const CheckboxSelection: React.FC = ({ graphType, loading, handleChange, isgds, isDocChunk, isEntity }) => (
    - handleChange('DocumentChunk')} - /> - handleChange('Entities')} - /> + />)} + {isEntity && ( + handleChange('Entities')} + /> + )} {isgds && ( = ({ graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -136,10 +136,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -152,9 +152,11 @@ const GraphViewModal: React.FunctionComponent = ({ try { const result = await fetchData(); if (result && result.data.data.nodes.length > 0) { - const neoNodes = result.data.data.nodes.map((f: Node) => f); - const neoRels = result.data.data.relationships.map((f: Relationship) => f); + const neoNodes = result.data.data.nodes.map((f: Node) => f).filter((node: ExtendedNode) => node.labels.length === 1); + const nodeIds = new Set(neoNodes.map((node:any) => node.element_id)); + const neoRels = result.data.data.relationships.map((f: Relationship) => f).filter((rel: any) => nodeIds.has(rel.end_node_element_id) && nodeIds.has(rel.start_node_element_id)); const { finalNodes, finalRels, schemeVal } = processGraphData(neoNodes, neoRels); + if (mode === 'refreshMode') { initGraph(graphType, finalNodes, finalRels, schemeVal); } else { @@ -246,8 +248,8 @@ const GraphViewModal: React.FunctionComponent = ({ match && viewPoint === graphLabels.showGraphView ? 100 : match && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, + ? 50 + : graphLabels.nodeSize, }; }); // deactivating any active relationships @@ -356,8 +358,8 @@ const GraphViewModal: React.FunctionComponent = ({ isActive && viewPoint === graphLabels.showGraphView ? 100 : isActive && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, + ? 50 + : graphLabels.nodeSize, }; }); // deactivating any active relationships @@ -421,7 +423,7 @@ const GraphViewModal: React.FunctionComponent = ({ graphType={graphType} loading={loading} handleChange={handleCheckboxChange} - isgds={allNodes.some((n) => n.labels.includes('__Community__'))} + {...getCheckboxConditions(allNodes)} /> )} diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 07d795484..48e94c023 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -27,7 +27,6 @@ export default function PageLayoutNew({ const [isRightExpanded, setIsRightExpanded] = useState(Boolean(largedesktops)); const [showChatBot, setShowChatBot] = useState(false); const [showDrawerChatbot, setShowDrawerChatbot] = useState(true); - const [clearHistoryData, setClearHistoryData] = useState(false); const [showEnhancementDialog, toggleEnhancementDialog] = useReducer((s) => !s, false); const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); @@ -48,7 +47,7 @@ export default function PageLayoutNew({ } }; - const { messages } = useMessageContext(); + const { messages, setClearHistoryData, clearHistoryData } = useMessageContext(); const { isSchema, setIsSchema, setShowTextFromSchemaDialog, showTextFromSchemaDialog } = useFileContext(); const deleteOnClick = async () => { diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index ee3ef510b..526150db4 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -203,7 +203,6 @@ const SideNav: React.FC = ({ {!isChatModalOpen && ( { setchatModeAnchor(e.currentTarget); setshowChatMode(true); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx index 28114ced7..b48b6c6a6 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx @@ -20,7 +20,7 @@ export default function PostProcessingCheckList() { {POST_PROCESSING_JOBS.map((job, idx) => { - const isCreateCommunities = job.title === 'create_communities'; + const isCreateCommunities = job.title === 'enable_communities'; return ( = ({ children }) = const [messages, setMessages] = useState([ { ...chatbotmessages.listMessages[1], datetime: getDateTime() }, ]); + const [clearHistoryData, setClearHistoryData] = useState(false); const value: MessageContextType = { messages, setMessages, + clearHistoryData, + setClearHistoryData }; return {children}; }; diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index fbab1b719..7ff7e28c4 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -1,44 +1,9 @@ -import { createContext, useContext, useState, Dispatch, SetStateAction, FC, useEffect } from 'react'; -import { CustomFile, FileContextProviderProps, OptionType } from '../types'; +import { createContext, useContext, useState, FC, useEffect } from 'react'; +import { CustomFile, FileContextProviderProps, FileContextType, OptionType, showTextFromSchemaDialogType } from '../types'; import { chatModeLables, defaultLLM } from '../utils/Constants'; import { useCredentials } from './UserCredentials'; import Queue from '../utils/Queue'; -interface showTextFromSchemaDialogType { - triggeredFrom: string; - show: boolean; -} -interface FileContextType { - files: (File | null)[] | []; - filesData: CustomFile[] | []; - setFiles: Dispatch>; - setFilesData: Dispatch>; - model: string; - setModel: Dispatch>; - graphType: string; - setGraphType: Dispatch>; - selectedNodes: readonly OptionType[]; - setSelectedNodes: Dispatch>; - selectedRels: readonly OptionType[]; - setSelectedRels: Dispatch>; - rowSelection: Record; - setRowSelection: React.Dispatch>>; - selectedRows: string[]; - setSelectedRows: React.Dispatch>; - selectedSchemas: readonly OptionType[]; - setSelectedSchemas: Dispatch>; - chatMode: string; - setchatMode: Dispatch>; - isSchema: boolean; - setIsSchema: React.Dispatch>; - showTextFromSchemaDialog: showTextFromSchemaDialogType; - setShowTextFromSchemaDialog: React.Dispatch>; - postProcessingTasks: string[]; - setPostProcessingTasks: React.Dispatch>; - queue: Queue; - setQueue: Dispatch>; - processedCount: number; - setProcessedCount: Dispatch>; -} + const FileContext = createContext(undefined); const FileContextProvider: FC = ({ children }) => { @@ -68,9 +33,11 @@ const FileContextProvider: FC = ({ children }) => { 'materialize_text_chunk_similarities', 'enable_hybrid_search_and_fulltext_search_in_bloom', 'materialize_entity_similarities', - 'create_communities', + 'enable_communities', ]); const [processedCount, setProcessedCount] = useState(0); + const [postProcessingVal, setPostProcessingVal] = useState(false); + useEffect(() => { if (selectedNodeLabelstr != null) { const selectedNodeLabel = JSON.parse(selectedNodeLabelstr); @@ -117,6 +84,8 @@ const FileContextProvider: FC = ({ children }) => { setQueue, processedCount, setProcessedCount, + postProcessingVal, + setPostProcessingVal }; return {children}; }; diff --git a/frontend/src/services/ChunkEntitiesInfo.ts b/frontend/src/services/ChunkEntitiesInfo.ts index f69ddd3aa..39d9f8c2e 100644 --- a/frontend/src/services/ChunkEntitiesInfo.ts +++ b/frontend/src/services/ChunkEntitiesInfo.ts @@ -1,20 +1,22 @@ -import { ChatInfo_APIResponse, UserCredentials } from '../types'; +import { ChatInfo_APIResponse, nodeDetailsProps, UserCredentials } from '../types'; import api from '../API/Index'; const chunkEntitiesAPI = async ( userCredentials: UserCredentials, - chunk_ids: string, database: string = 'neo4j', - is_entity: boolean = false + nodeDetails: (nodeDetailsProps), + entities:(string)[], + mode: string, ) => { try { const formData = new FormData(); formData.append('uri', userCredentials?.uri ?? ''); formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); - formData.append('chunk_ids', chunk_ids); formData.append('database', database); - formData.append('is_entity', String(is_entity)); + formData.append('nodedetails', JSON.stringify(nodeDetails)); + formData.append('entities', JSON.stringify(entities)); + formData.append('mode', mode); const response: ChatInfo_APIResponse = await api.post(`/chunk_entities`, formData, { headers: { diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 10f16e4d6..b280cc5d6 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -5,6 +5,7 @@ import { OverridableStringUnion } from '@mui/types'; import type { Node, Relationship } from '@neo4j-nvl/base'; import { NonOAuthError } from '@react-oauth/google'; import { BannerType } from '@neo4j-ndl/react'; +import Queue from './utils/Queue'; export interface CustomFileBase extends Partial { processing: number | string; @@ -201,7 +202,7 @@ export interface Source { source_name: string; start_time?: string; } -export interface chunk { +export interface ChunkDetail { id: string; score: number; } @@ -215,7 +216,8 @@ export interface Messages { model?: string; isLoading?: boolean; response_time?: number; - chunk_ids?: chunk[]; + nodeDetails?: nodeDetailsProps; + chunk_ids?: string[]; total_tokens?: number; speaking?: boolean; copying?: boolean; @@ -223,7 +225,7 @@ export interface Messages { cypher_query?: string; graphonly_entities?: []; error?: string; - entities?: chunk[]; + entities?: string[]; } export type ChatbotProps = { @@ -234,7 +236,7 @@ export type ChatbotProps = { isFullScreen?: boolean; connectionStatus: boolean; }; -export interface WikipediaModalTypes extends Omit {} +export interface WikipediaModalTypes extends Omit { } export interface GraphViewModalProps { open: boolean; @@ -257,6 +259,8 @@ export interface CheckboxSectionProps { loading: boolean; handleChange: (graph: GraphType) => void; isgds: boolean; + isDocChunk: boolean; + isEntity: boolean; } export interface fileName { @@ -414,12 +418,13 @@ export interface chatInfoMessage extends Partial { sources: string[]; model: string; response_time: number; - chunk_ids: chunk[]; total_tokens: number; mode: string; cypher_query?: string; graphonly_entities: []; error: string; + entities_ids: string[]; + nodeDetails: nodeDetailsProps; } export interface eventResponsetypes extends Omit { @@ -471,6 +476,7 @@ export type Community = { weight: number; level: number; community_rank: number; + score?: number; }; export type GroupedEntity = { texts: Set; @@ -639,6 +645,8 @@ export interface ContextProps { export interface MessageContextType { messages: Messages[] | []; setMessages: Dispatch>; + clearHistoryData: boolean; + setClearHistoryData: Dispatch> } export interface DatabaseStatusProps { @@ -670,3 +678,63 @@ export type CommunitiesProps = { loading: boolean; communities: Community[]; }; + +export interface entity { + id: string; + score: number; +}; + +export interface community { + id: string; + score: number; +} + +export interface nodeDetailsProps { + chunkdetails?: ChunkDetail[], + entitydetails?: entity[], + communitydetails?: community[] +} + +export type entityProps = { + entityids: [], + relationshipids: [] +} + +export interface showTextFromSchemaDialogType { + triggeredFrom: string; + show: boolean; +} +export interface FileContextType { + files: (File | null)[] | []; + filesData: CustomFile[] | []; + setFiles: Dispatch>; + setFilesData: Dispatch>; + model: string; + setModel: Dispatch>; + graphType: string; + setGraphType: Dispatch>; + selectedNodes: readonly OptionType[]; + setSelectedNodes: Dispatch>; + selectedRels: readonly OptionType[]; + setSelectedRels: Dispatch>; + rowSelection: Record; + setRowSelection: React.Dispatch>>; + selectedRows: string[]; + setSelectedRows: React.Dispatch>; + selectedSchemas: readonly OptionType[]; + setSelectedSchemas: Dispatch>; + chatMode: string; + setchatMode: Dispatch>; + isSchema: boolean; + setIsSchema: React.Dispatch>; + showTextFromSchemaDialog: showTextFromSchemaDialogType; + setShowTextFromSchemaDialog: React.Dispatch>; + postProcessingTasks: string[]; + setPostProcessingTasks: React.Dispatch>; + queue: Queue; + setQueue: Dispatch>; + processedCount: number; + setProcessedCount: Dispatch>; + postProcessingVal: boolean; + setPostProcessingVal: Dispatch>; +} \ No newline at end of file diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 08948ba87..b55c43817 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -75,6 +75,7 @@ export const chatModeLables = { entity_vector: 'entity search+vector', unavailableChatMode: 'Chat mode is unavailable when rows are selected', selected: 'Selected', + global_vector: 'global search+vector+fulltext' }; export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' @@ -107,6 +108,10 @@ export const chatModes = mode: chatModeLables.entity_vector, description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', }, + { + mode : chatModeLables.global_vector, + description: 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.' + } ]; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; @@ -234,8 +239,8 @@ export const POST_PROCESSING_JOBS: { title: string; description: string }[] = [ performing similarity-based searches.`, }, { - title: 'create_communities', - description: 'Create Communities identifies and groups similar entities, improving search accuracy and analysis.', + title: 'enable_communities', + description: 'Enable community creation across entities to use GraphRAG capabilities both local and global search.', }, ]; export const RETRY_OPIONS = [ diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 97e7bda07..18fb69105 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -178,6 +178,7 @@ export const processGraphData = (neoNodes: ExtendedNode[], neoRels: ExtendedRela }; }); const finalNodes = newNodes.flat(); + // Process relationships const newRels: Relationship[] = neoRels.map((relations: any) => { return { id: relations.element_id, @@ -213,6 +214,7 @@ export const filterData = ( (type) => type !== 'Document' && type !== 'Chunk' && type !== '__Community__' ); // Only Document + Chunk + // const processedEntities = entityTypes.flatMap(item => item.includes(',') ? item.split(',') : item); if ( graphType.includes('DocumentChunk') && !graphType.includes('Entities') && @@ -245,6 +247,7 @@ export const filterData = ( nodeIds.has(rel.to) ); filteredScheme = Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])) as Scheme; + console.log('labels', entityNodes); // Only Communities } else if ( graphType.includes('Communities') && @@ -336,6 +339,7 @@ export const filterData = ( filteredNodes = allNodes; filteredRelations = allRelationships; filteredScheme = scheme; + console.log('entity', filteredScheme); } return { filteredNodes, filteredRelations, filteredScheme }; }; @@ -428,6 +432,8 @@ export const getDescriptionForChatMode = (mode: string): string => { return 'Merges vector indexing, graph connections, and fulltext indexing for a comprehensive search approach, combining semantic similarity, contextual relevance, and keyword-based search for optimal results.'; case chatModeLables.entity_vector: return 'Combines entity node vector indexing with graph connections for accurate entity-based search, providing the most relevant response.'; + case chatModeLables.global_vector: + return 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.' default: return 'Chat mode description not available'; // Fallback description } @@ -469,3 +475,10 @@ export function isAllowedHost(url: string, allowedHosts: string[]) { return false; } } + +export const getCheckboxConditions = (allNodes: ExtendedNode[]) => { + const isDocChunk = allNodes.some((n) => n.labels?.includes('Document')); + const isEntity = allNodes.some((n) => !n.labels?.includes('Document') || !n.labels?.includes('Chunk')); + const isgds = allNodes.some((n) => n.labels?.includes('__Community__')); + return { isDocChunk, isEntity, isgds }; +}; From 1180406a7a2f0eb7605a34ecfad37f2f584f9af9 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Fri, 27 Sep 2024 10:01:48 +0000 Subject: [PATCH 136/292] Added elapsed time for extarction on each breakdown function --- backend/src/main.py | 86 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 68 insertions(+), 18 deletions(-) diff --git a/backend/src/main.py b/backend/src/main.py index 41942d500..ce8c9812a 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -292,14 +292,31 @@ def processing_source(uri, userName, password, database, model, file_name, pages Json response to API with fileName, nodeCount, relationshipCount, processingTime, status and model as attributes. """ + uri_latency = {} start_time = datetime.now() processing_source_start_time = time.time() + start_create_connection = time.time() graph = create_graph_database_connection(uri, userName, password, database) + end_create_connection = time.time() + elapsed_create_connection = end_create_connection - start_create_connection + logging.info(f'Time taken database connection: {elapsed_create_connection:.2f} seconds') + uri_latency["create_connection"] = f'{elapsed_create_connection:.2f}' graphDb_data_Access = graphDBdataAccess(graph) + start_get_chunkId_chunkDoc_list = time.time() total_chunks, chunkId_chunkDoc_list = get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition) + end_get_chunkId_chunkDoc_list = time.time() + elapsed_get_chunkId_chunkDoc_list = end_get_chunkId_chunkDoc_list - start_get_chunkId_chunkDoc_list + logging.info(f'Time taken to create list chunkids with chunk document: {elapsed_get_chunkId_chunkDoc_list:.2f} seconds') + uri_latency["create_list_chunk_and_document"] = f'{elapsed_get_chunkId_chunkDoc_list:.2f}' + + start_status_document_node = time.time() result = graphDb_data_Access.get_current_status_document_node(file_name) - + end_status_document_node = time.time() + elapsed_status_document_node = end_status_document_node - start_status_document_node + logging.info(f'Time taken to get the current status of document node: {elapsed_status_document_node:.2f} seconds') + uri_latency["get_status_document_node"] = f'{elapsed_status_document_node:.2f}' + select_chunks_with_retry=0 node_count = 0 rel_count = 0 @@ -319,8 +336,14 @@ def processing_source(uri, userName, password, database, model, file_name, pages obj_source_node.processed_chunk = 0+select_chunks_with_retry logging.info(file_name) logging.info(obj_source_node) - graphDb_data_Access.update_source_node(obj_source_node) + start_update_source_node = time.time() + graphDb_data_Access.update_source_node(obj_source_node) + end_update_source_node = time.time() + elapsed_update_source_node = end_update_source_node - start_update_source_node + logging.info(f'Time taken to update the document source node: {elapsed_update_source_node:.2f} seconds') + uri_latency["update_source_node"] = f'{elapsed_update_source_node:.2f}' + logging.info('Update the status as Processing') update_graph_chunk_processed = int(os.environ.get('UPDATE_GRAPH_CHUNKS_PROCESSED')) # selected_chunks = [] @@ -342,9 +365,12 @@ def processing_source(uri, userName, password, database, model, file_name, pages break else: processing_chunks_start_time = time.time() - node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count) - processing_chunks_end_time = time.time() - processing_chunks_start_time - logging.info(f"{update_graph_chunk_processed} chunks processed upto {select_chunks_upto} completed in {processing_chunks_end_time:.2f} seconds for file name {file_name}") + node_count,rel_count,latency_processed_chunk = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count) + processing_chunks_end_time = time.time() + processing_chunks_elapsed_end_time = processing_chunks_end_time - processing_chunks_start_time + logging.info(f"Time taken {update_graph_chunk_processed} chunks processed upto {select_chunks_upto} completed in {processing_chunks_elapsed_end_time:.2f} seconds for file name {file_name}") + uri_latency[f'processed_combine_chunk_{i}'] = f'{processing_chunks_elapsed_end_time:.2f}' + uri_latency[f'processed_chunk_detail_{i}'] = latency_processed_chunk end_time = datetime.now() processed_time = end_time - start_time @@ -390,16 +416,17 @@ def processing_source(uri, userName, password, database, model, file_name, pages else: delete_uploaded_local_file(merged_file_path, file_name) processing_source_func = time.time() - processing_source_start_time - logging.info(f"processing source function completed in {processing_source_func:.2f} seconds for file name {file_name}") - return { - "fileName": file_name, - "nodeCount": node_count, - "relationshipCount": rel_count, - "processingTime": round(processed_time.total_seconds(),2), - "status" : job_status, - "model" : model, - "success_count" : 1 - } + logging.info(f"Time taken to processing source function completed in {processing_source_func:.2f} seconds for file name {file_name}") + uri_latency["Processed_source"] = f'{processing_source_func:.2f}' + uri_latency["fileName"] = file_name + uri_latency["nodeCount"] = node_count + uri_latency["relationshipCount"] = rel_count + uri_latency["total_processing_time"] = round(processed_time.total_seconds(),2) + uri_latency["status"] = job_status + uri_latency["model"] = model + uri_latency["success_count"] = 1 + + return uri_latency else: logging.info('File does not process because it\'s already in Processing status') else: @@ -414,15 +441,38 @@ def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, datab graph = create_graph_database_connection(uri, userName, password, database) else: graph = create_graph_database_connection(uri, userName, password, database) - + + start_update_embedding = time.time() update_embedding_create_vector_index( graph, chunkId_chunkDoc_list, file_name) + end_update_embedding = time.time() + elapsed_update_embedding = end_update_embedding - start_update_embedding + logging.info(f'Time taken to update embedding in chunk node: {elapsed_update_embedding:.2f} seconds') + latency_processing_chunk = {"update_embedding" : f'{elapsed_update_embedding:.2f}'} logging.info("Get graph document list from models") + + start_entity_extraction = time.time() graph_documents = get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship) + end_entity_extraction = time.time() + elapsed_entity_extraction = end_entity_extraction - start_entity_extraction + logging.info(f'Time taken to extract enitities from LLM Graph Builder: {elapsed_entity_extraction:.2f} seconds') + latency_processing_chunk["entity_extraction"] = f'{elapsed_entity_extraction:.2f}' cleaned_graph_documents = handle_backticks_nodes_relationship_id_type(graph_documents) + + start_save_graphDocuments = time.time() save_graphDocuments_in_neo4j(graph, cleaned_graph_documents) + end_save_graphDocuments = time.time() + elapsed_save_graphDocuments = end_save_graphDocuments - start_save_graphDocuments + logging.info(f'Time taken to save graph document in neo4j: {elapsed_save_graphDocuments:.2f} seconds') + latency_processing_chunk["save_graphDocuments"] = f'{elapsed_save_graphDocuments:.2f}' + chunks_and_graphDocuments_list = get_chunk_and_graphDocument(cleaned_graph_documents, chunkId_chunkDoc_list) + + start_relationship = time.time() merge_relationship_between_chunk_and_entites(graph, chunks_and_graphDocuments_list) - # return graph_documents + end_relationship = time.time() + elapsed_relationship = end_relationship - start_relationship + logging.info(f'Time taken to create relationship between chunk and entities: {elapsed_relationship:.2f} seconds') + latency_processing_chunk["relationship_between_chunk_entity"] = f'{elapsed_relationship:.2f}' distinct_nodes = set() relations = [] @@ -441,7 +491,7 @@ def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, datab rel_count += len(relations) print(f'node count internal func:{node_count}') print(f'relation count internal func:{rel_count}') - return node_count,rel_count + return node_count,rel_count,latency_processing_chunk def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): if retry_condition is None: From 9206fc89a773e862fe8d5b92f52c6c87e2a91968 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:15:24 +0000 Subject: [PATCH 137/292] lint and format fixes --- .../src/components/ChatBot/ChatInfoModal.tsx | 55 ++++++++----------- .../src/components/ChatBot/ChatModeToggle.tsx | 21 +++---- frontend/src/components/ChatBot/Chatbot.tsx | 32 ++++++----- frontend/src/components/Content.tsx | 33 +++++------ .../components/Graph/CheckboxSelection.tsx | 23 +++++--- .../src/components/Graph/GraphViewModal.tsx | 34 +++++++----- .../EntityExtractionSetting.tsx | 8 +++ frontend/src/context/UserMessages.tsx | 2 +- frontend/src/context/UsersFiles.tsx | 10 +++- frontend/src/services/ChunkEntitiesInfo.ts | 6 +- frontend/src/types.ts | 20 +++---- frontend/src/utils/Constants.ts | 9 +-- frontend/src/utils/Utils.ts | 2 +- 13 files changed, 139 insertions(+), 116 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 568868000..92ac3fd9a 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -43,18 +43,12 @@ const ChatInfoModal: React.FC = ({ cypher_query, graphonly_entities, error, - entities_ids + entities_ids, }) => { const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); const [activeTab, setActiveTab] = useState( - error?.length - ? 10 - : mode === chatModeLables.global_vector - ? 7 - : mode === chatModeLables.graph - ? 4 - : 3 + error?.length ? 10 : mode === chatModeLables.global_vector ? 7 : mode === chatModeLables.graph ? 4 : 3 ); const [infoEntities, setInfoEntities] = useState([]); const [communities, setCommunities] = useState([]); @@ -95,8 +89,12 @@ const ChatInfoModal: React.FC = ({ (async () => { setLoading(true); try { - const response = await chunkEntitiesAPI(userCredentials as UserCredentials, userCredentials?.database, nodeDetails, entities_ids, - mode, + const response = await chunkEntitiesAPI( + userCredentials as UserCredentials, + userCredentials?.database, + nodeDetails, + entities_ids, + mode ); if (response.data.status === 'Failure') { throw new Error(response.data.error); @@ -129,26 +127,25 @@ const ChatInfoModal: React.FC = ({ }) ); setRelationships(relationshipsData ?? []); - setCommunities(communitiesData.map((community: any) => { - const communityScore = nodeDetails?.communitydetails?.find((c: any) => - c.id === community.element_id); - return { - ...community, - score: communityScore?.score || 1 - }; - }) - .sort((a: any, b: any) => b.score - a.score) + setCommunities( + communitiesData + .map((community: any) => { + const communityScore = nodeDetails?.communitydetails?.find((c: any) => c.id === community.element_id); + return { + ...community, + score: communityScore?.score || 1, + }; + }) + .sort((a: any, b: any) => b.score - a.score) ); setChunks( chunksData .map((chunk: any) => { - const chunkScore = nodeDetails?.chunkdetails?.find((c: any) => - c.id - === chunk.id); + const chunkScore = nodeDetails?.chunkdetails?.find((c: any) => c.id === chunk.id); return { ...chunk, - score: chunkScore?.score + score: chunkScore?.score, }; }) .sort((a: any, b: any) => b.score - a.score) @@ -199,9 +196,9 @@ const ChatInfoModal: React.FC = ({ {mode != chatModeLables.graph ? Sources used : <>} {mode != chatModeLables.graph ? Chunks : <>} {mode === chatModeLables.graph_vector || - mode === chatModeLables.graph || - mode === chatModeLables.graph_vector_fulltext || - mode === chatModeLables.entity_vector ? ( + mode === chatModeLables.graph || + mode === chatModeLables.graph_vector_fulltext || + mode === chatModeLables.entity_vector ? ( Top Entities used ) : ( <> @@ -211,11 +208,7 @@ const ChatInfoModal: React.FC = ({ ) : ( <> )} - {mode === chatModeLables.entity_vector ? ( - Communities - ) : ( - <> - )} + {mode === chatModeLables.entity_vector ? Communities : <>} )} diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index b3d493236..df44985be 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -8,7 +8,7 @@ import { capitalizeWithPlus } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; export default function ChatModeToggle({ menuAnchor, - closeHandler = () => { }, + closeHandler = () => {}, open, anchorPortal = true, disableBackdrop = false, @@ -35,20 +35,17 @@ export default function ChatModeToggle({ return isGdsActive && isCommunityAllowed ? chatModes : chatModes?.filter( - (m) => - !m.mode.includes(chatModeLables.entity_vector) && - !m.mode.includes(chatModeLables.global_vector) - ); + (m) => !m.mode.includes(chatModeLables.entity_vector) && !m.mode.includes(chatModeLables.global_vector) + ); }, [isGdsActive, isCommunityAllowed]); const menuItems = useMemo(() => { return memoizedChatModes?.map((m) => { const isDisabled = Boolean( - selectedRows.length && - !(m.mode === chatModeLables.vector || m.mode === chatModeLables.graph_vector) + selectedRows.length && !(m.mode === chatModeLables.vector || m.mode === chatModeLables.graph_vector) ); const handleModeChange = () => { if (isDisabled) { - setchatMode(chatModeLables.graph_vector); + setchatMode(chatModeLables.graph_vector); } else { setchatMode(m.mode); } @@ -57,11 +54,11 @@ export default function ChatModeToggle({ return { title: (
    - + {m.mode.includes('+') ? capitalizeWithPlus(m.mode) : capitalize(m.mode)}
    - {m.description} + {m.description}
    ), @@ -71,12 +68,12 @@ export default function ChatModeToggle({ {chatMode === m.mode && ( <> - {chatModeLables.selected} + {chatModeLables.selected} )} {isDisabled && ( <> - {chatModeLables.unavailableChatMode} + {chatModeLables.unavailableChatMode} )} diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 82e6dfed0..550d04c97 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -58,8 +58,8 @@ const Chatbot: FC = (props) => { }, }); - let selectedFileNames: CustomFile[] = filesData.filter(f => - selectedRows.includes(f.id) && ['Completed'].includes(f.status) + let selectedFileNames: CustomFile[] = filesData.filter( + (f) => selectedRows.includes(f.id) && ['Completed'].includes(f.status) ); const handleInputChange = (e: React.ChangeEvent) => { @@ -117,7 +117,7 @@ const Chatbot: FC = (props) => { graphonly_entities: response?.graphonly_entities, error: response.error, entitiysearchonly_entities: response.entitiysearchonly_entities, - nodeDetails: response?.nodeDetails + nodeDetails: response?.nodeDetails, }, ]); } else { @@ -217,7 +217,7 @@ const Chatbot: FC = (props) => { error, entitiysearchonly_entities, chatEntities, - nodeDetails: chatnodedetails + nodeDetails: chatnodedetails, }; simulateTypingEffect(finalbotReply); } catch (error) { @@ -322,8 +322,9 @@ const Chatbot: FC = (props) => { @@ -335,10 +336,11 @@ const Chatbot: FC = (props) => { } >
    {chat.message}
    @@ -369,7 +371,7 @@ const Chatbot: FC = (props) => { setgraphEntitites(chat.graphonly_entities ?? []); setEntitiesModal(chat.entities ?? []); setmessageError(chat.error ?? ''); - setNodeDetailsModal(chat.nodeDetails ?? {}) + setNodeDetailsModal(chat.nodeDetails ?? {}); }} > {' '} @@ -423,8 +425,9 @@ const Chatbot: FC = (props) => {
    = (props) => { disabled={loading || !connectionStatus} size='medium' > - {buttonCaptions.ask} {selectedFileNames != undefined && selectedFileNames.length > 0 && `(${selectedFileNames.length})`} + {buttonCaptions.ask}{' '} + {selectedFileNames != undefined && selectedFileNames.length > 0 && `(${selectedFileNames.length})`}
    diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 3f7564461..d6f9c709d 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -95,7 +95,6 @@ const Content: React.FC = ({ queue, processedCount, setProcessedCount, - setPostProcessingVal } = useFileContext(); const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'>('tableView'); const [showDeletePopUp, setshowDeletePopUp] = useState(false); @@ -162,7 +161,7 @@ const Content: React.FC = ({ (async () => { showNormalToast('Some Q&A functionality will only be available afterwards.'); await postProcessing(userCredentials as UserCredentials, postProcessingTasks); - showSuccessToast('All Q&A functionality is available now.'); + showSuccessToast('All Q&A functionality is available now.'); })(); } }, [processedCount, userCredentials, queue]); @@ -512,8 +511,9 @@ const Content: React.FC = ({ const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; const uriCoded = userCredentials?.uri.replace(/:\d+$/, ''); - const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${userCredentials?.port ?? '7687' - }`; + const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${ + userCredentials?.port ?? '7687' + }`; const encodedURL = encodeURIComponent(connectURL); const replacedUrl = bloomUrl?.replace('{CONNECT_URL}', encodedURL); window.open(replacedUrl, '_blank'); @@ -523,10 +523,10 @@ const Content: React.FC = ({ isLeftExpanded && isRightExpanded ? 'contentWithExpansion' : isRightExpanded - ? 'contentWithChatBot' - : !isLeftExpanded && !isRightExpanded - ? 'w-[calc(100%-128px)]' - : 'contentWithDropzoneExpansion'; + ? 'contentWithChatBot' + : !isLeftExpanded && !isRightExpanded + ? 'w-[calc(100%-128px)]' + : 'contentWithDropzoneExpansion'; const handleGraphView = () => { setOpenGraphView(true); @@ -557,12 +557,12 @@ const Content: React.FC = ({ return prev.map((f) => { return f.name === filename ? { - ...f, - status: 'Reprocess', - processingProgress: isStartFromBegining ? 0 : f.processingProgress, - NodesCount: isStartFromBegining ? 0 : f.NodesCount, - relationshipCount: isStartFromBegining ? 0 : f.relationshipCount, - } + ...f, + status: 'Reprocess', + processingProgress: isStartFromBegining ? 0 : f.processingProgress, + NodesCount: isStartFromBegining ? 0 : f.NodesCount, + relationshipCount: isStartFromBegining ? 0 : f.relationshipCount, + } : f; }); }); @@ -851,8 +851,9 @@ const Content: React.FC = ({ handleGenerateGraph={processWaitingFilesOnRefresh} > diff --git a/frontend/src/components/Graph/CheckboxSelection.tsx b/frontend/src/components/Graph/CheckboxSelection.tsx index 738a1dd01..d0a3429f2 100644 --- a/frontend/src/components/Graph/CheckboxSelection.tsx +++ b/frontend/src/components/Graph/CheckboxSelection.tsx @@ -3,15 +3,24 @@ import React from 'react'; import { CheckboxSectionProps } from '../../types'; import { graphLabels } from '../../utils/Constants'; -const CheckboxSelection: React.FC = ({ graphType, loading, handleChange, isgds, isDocChunk, isEntity }) => ( +const CheckboxSelection: React.FC = ({ + graphType, + loading, + handleChange, + isgds, + isDocChunk, + isEntity, +}) => (
    - {isDocChunk && ( handleChange('DocumentChunk')} - />)} + {isDocChunk && ( + handleChange('DocumentChunk')} + /> + )} {isEntity && ( = ({ graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -136,10 +136,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -152,9 +152,13 @@ const GraphViewModal: React.FunctionComponent = ({ try { const result = await fetchData(); if (result && result.data.data.nodes.length > 0) { - const neoNodes = result.data.data.nodes.map((f: Node) => f).filter((node: ExtendedNode) => node.labels.length === 1); - const nodeIds = new Set(neoNodes.map((node:any) => node.element_id)); - const neoRels = result.data.data.relationships.map((f: Relationship) => f).filter((rel: any) => nodeIds.has(rel.end_node_element_id) && nodeIds.has(rel.start_node_element_id)); + const neoNodes = result.data.data.nodes + .map((f: Node) => f) + .filter((node: ExtendedNode) => node.labels.length === 1); + const nodeIds = new Set(neoNodes.map((node: any) => node.element_id)); + const neoRels = result.data.data.relationships + .map((f: Relationship) => f) + .filter((rel: any) => nodeIds.has(rel.end_node_element_id) && nodeIds.has(rel.start_node_element_id)); const { finalNodes, finalRels, schemeVal } = processGraphData(neoNodes, neoRels); if (mode === 'refreshMode') { @@ -248,8 +252,8 @@ const GraphViewModal: React.FunctionComponent = ({ match && viewPoint === graphLabels.showGraphView ? 100 : match && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, + ? 50 + : graphLabels.nodeSize, }; }); // deactivating any active relationships @@ -358,8 +362,8 @@ const GraphViewModal: React.FunctionComponent = ({ isActive && viewPoint === graphLabels.showGraphView ? 100 : isActive && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, + ? 50 + : graphLabels.nodeSize, }; }); // deactivating any active relationships diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx index 5bbd4607a..8a7362d26 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx @@ -217,6 +217,14 @@ export default function EntityExtractionSetting({ setSelectedRels(relationshipTypeOptions); setIsSchema(true); localStorage.setItem('isSchema', JSON.stringify(true)); + localStorage.setItem( + 'selectedNodeLabels', + JSON.stringify({ db: userCredentials?.uri, selectedOptions: nodeLabelOptions }) + ); + localStorage.setItem( + 'selectedRelationshipLabels', + JSON.stringify({ db: userCredentials?.uri, selectedOptions: relationshipTypeOptions }) + ); }, [nodeLabelOptions, relationshipTypeOptions]); const handleClear = () => { diff --git a/frontend/src/context/UserMessages.tsx b/frontend/src/context/UserMessages.tsx index 1bbed905d..0e7b7abde 100644 --- a/frontend/src/context/UserMessages.tsx +++ b/frontend/src/context/UserMessages.tsx @@ -15,7 +15,7 @@ const MessageContextWrapper: FC = ({ children }) = messages, setMessages, clearHistoryData, - setClearHistoryData + setClearHistoryData, }; return {children}; }; diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index 7ff7e28c4..04a435f07 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -1,5 +1,11 @@ import { createContext, useContext, useState, FC, useEffect } from 'react'; -import { CustomFile, FileContextProviderProps, FileContextType, OptionType, showTextFromSchemaDialogType } from '../types'; +import { + CustomFile, + FileContextProviderProps, + FileContextType, + OptionType, + showTextFromSchemaDialogType, +} from '../types'; import { chatModeLables, defaultLLM } from '../utils/Constants'; import { useCredentials } from './UserCredentials'; import Queue from '../utils/Queue'; @@ -85,7 +91,7 @@ const FileContextProvider: FC = ({ children }) => { processedCount, setProcessedCount, postProcessingVal, - setPostProcessingVal + setPostProcessingVal, }; return {children}; }; diff --git a/frontend/src/services/ChunkEntitiesInfo.ts b/frontend/src/services/ChunkEntitiesInfo.ts index 39d9f8c2e..dbbbcbb5b 100644 --- a/frontend/src/services/ChunkEntitiesInfo.ts +++ b/frontend/src/services/ChunkEntitiesInfo.ts @@ -4,9 +4,9 @@ import api from '../API/Index'; const chunkEntitiesAPI = async ( userCredentials: UserCredentials, database: string = 'neo4j', - nodeDetails: (nodeDetailsProps), - entities:(string)[], - mode: string, + nodeDetails: nodeDetailsProps, + entities: string[], + mode: string ) => { try { const formData = new FormData(); diff --git a/frontend/src/types.ts b/frontend/src/types.ts index b280cc5d6..f407ca350 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -236,7 +236,7 @@ export type ChatbotProps = { isFullScreen?: boolean; connectionStatus: boolean; }; -export interface WikipediaModalTypes extends Omit { } +export interface WikipediaModalTypes extends Omit {} export interface GraphViewModalProps { open: boolean; @@ -646,7 +646,7 @@ export interface MessageContextType { messages: Messages[] | []; setMessages: Dispatch>; clearHistoryData: boolean; - setClearHistoryData: Dispatch> + setClearHistoryData: Dispatch>; } export interface DatabaseStatusProps { @@ -682,7 +682,7 @@ export type CommunitiesProps = { export interface entity { id: string; score: number; -}; +} export interface community { id: string; @@ -690,15 +690,15 @@ export interface community { } export interface nodeDetailsProps { - chunkdetails?: ChunkDetail[], - entitydetails?: entity[], - communitydetails?: community[] + chunkdetails?: ChunkDetail[]; + entitydetails?: entity[]; + communitydetails?: community[]; } export type entityProps = { - entityids: [], - relationshipids: [] -} + entityids: []; + relationshipids: []; +}; export interface showTextFromSchemaDialogType { triggeredFrom: string; @@ -737,4 +737,4 @@ export interface FileContextType { setProcessedCount: Dispatch>; postProcessingVal: boolean; setPostProcessingVal: Dispatch>; -} \ No newline at end of file +} diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index b55c43817..8a0fa5ff3 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -75,7 +75,7 @@ export const chatModeLables = { entity_vector: 'entity search+vector', unavailableChatMode: 'Chat mode is unavailable when rows are selected', selected: 'Selected', - global_vector: 'global search+vector+fulltext' + global_vector: 'global search+vector+fulltext', }; export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' @@ -109,9 +109,10 @@ export const chatModes = description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', }, { - mode : chatModeLables.global_vector, - description: 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.' - } + mode: chatModeLables.global_vector, + description: + 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', + }, ]; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 18fb69105..1970ee754 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -433,7 +433,7 @@ export const getDescriptionForChatMode = (mode: string): string => { case chatModeLables.entity_vector: return 'Combines entity node vector indexing with graph connections for accurate entity-based search, providing the most relevant response.'; case chatModeLables.global_vector: - return 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.' + return 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.'; default: return 'Chat mode description not available'; // Fallback description } From 04058166a07bbf057e0f774c9b9222ce928a9749 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:28:00 +0000 Subject: [PATCH 138/292] removed dev logs --- frontend/src/components/ChatBot/Communities.tsx | 1 - frontend/src/components/Content.tsx | 7 +++---- .../Popups/ConnectionModal/ConnectionModal.tsx | 1 - .../components/Popups/GraphEnhancementDialog/index.tsx | 3 +-- frontend/src/utils/Constants.ts | 10 ++-------- frontend/src/utils/Utils.ts | 2 -- 6 files changed, 6 insertions(+), 18 deletions(-) diff --git a/frontend/src/components/ChatBot/Communities.tsx b/frontend/src/components/ChatBot/Communities.tsx index 11869d3d4..e2092c101 100644 --- a/frontend/src/components/ChatBot/Communities.tsx +++ b/frontend/src/components/ChatBot/Communities.tsx @@ -4,7 +4,6 @@ import ReactMarkdown from 'react-markdown'; import { CommunitiesProps } from '../../types'; const CommunitiesInfo: FC = ({ loading, communities }) => { - console.log('communities', communities); return ( <> {loading ? ( diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index d6f9c709d..9c32294fd 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -185,7 +185,6 @@ const Content: React.FC = ({ if (connection != null) { (async () => { const parsedData = JSON.parse(connection); - console.log(parsedData.uri); const response = await connectAPI( parsedData.uri, parsedData.user, @@ -473,12 +472,12 @@ const Content: React.FC = ({ } data = triggerBatchProcessing(filesTobeSchedule, filesTobeProcessed, true, true); } - Promise.allSettled(data).then(async (_) => { + Promise.allSettled(data).then((_) => { setextractLoading(false); }); } else if (queueFiles && !queue.isEmpty() && processingFilesCount < batchSize) { data = scheduleBatchWiseProcess(queue.items, true); - Promise.allSettled(data).then(async (_) => { + Promise.allSettled(data).then((_) => { setextractLoading(false); }); } else { @@ -497,7 +496,7 @@ const Content: React.FC = ({ } else { data = triggerBatchProcessing(queue.items, queue.items as CustomFile[], true, false); } - Promise.allSettled(data).then(async (_) => { + Promise.allSettled(data).then((_) => { setextractLoading(false); }); } else { diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 618cc7b80..ad7e7b48e 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -210,7 +210,6 @@ export default function ConnectionModal({ const isgdsActive = response.data.data.gds_status; const isReadOnlyUser = !response.data.data.write_access; setGdsActive(isgdsActive); - console.log({ isReadOnlyUser }); setIsReadOnlyUser(isReadOnlyUser); localStorage.setItem( 'neo4j.connection', diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx index 9e409d8e2..c4d9ee2f7 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx @@ -29,9 +29,8 @@ export default function GraphEnhancementDialog({ const orphanNodesDeleteHandler = async (selectedEntities: string[]) => { try { setorphanDeleteAPIloading(true); - const response = await deleteOrphanAPI(userCredentials as UserCredentials, selectedEntities); + await deleteOrphanAPI(userCredentials as UserCredentials, selectedEntities); setorphanDeleteAPIloading(false); - console.log(response); } catch (error) { setorphanDeleteAPIloading(false); console.log(error); diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 8a0fa5ff3..daf3ef0f2 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -43,7 +43,6 @@ export const llms = 'openai-gpt-3.5', 'openai-gpt-4o', 'openai-gpt-4o-mini', - 'gemini-1.0-pro', 'gemini-1.5-pro', 'gemini-1.5-flash', 'azure_ai_gpt_35', @@ -57,15 +56,10 @@ export const llms = export const defaultLLM = llms?.includes('openai-gpt-4o') ? 'openai-gpt-4o' - : llms?.includes('gemini-1.0-pro') - ? 'gemini-1.0-pro' + : llms?.includes('gemini-1.5-pro') + ? 'gemini-1.5-pro' : 'diffbot'; -// export const chatModes = -// process.env?.VITE_CHAT_MODES?.trim() != '' -// ? process.env.VITE_CHAT_MODES?.split(',') -// : ['vector', 'graph', 'graph+vector', 'fulltext', 'graph+vector+fulltext', 'local community', 'global community']; - export const chatModeLables = { vector: 'vector', graph: 'graph', diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 1970ee754..1d8f3be07 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -247,7 +247,6 @@ export const filterData = ( nodeIds.has(rel.to) ); filteredScheme = Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])) as Scheme; - console.log('labels', entityNodes); // Only Communities } else if ( graphType.includes('Communities') && @@ -339,7 +338,6 @@ export const filterData = ( filteredNodes = allNodes; filteredRelations = allRelationships; filteredScheme = scheme; - console.log('entity', filteredScheme); } return { filteredNodes, filteredRelations, filteredScheme }; }; From c9844b70c1590f32927ebbf5ffa08cd6c3462a3a Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:08:37 +0000 Subject: [PATCH 139/292] communities fix --- .../src/components/ChatBot/ChatInfoModal.tsx | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 92ac3fd9a..84cb2ce88 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -128,12 +128,14 @@ const ChatInfoModal: React.FC = ({ ); setRelationships(relationshipsData ?? []); setCommunities( - communitiesData - .map((community: any) => { - const communityScore = nodeDetails?.communitydetails?.find((c: any) => c.id === community.element_id); + (communitiesData || []) + .map((community: { element_id: string }) => { + const communityScore = nodeDetails?.communitydetails?.find((c: { id: string }) => + c.id + === community.element_id); return { ...community, - score: communityScore?.score || 1, + score: communityScore?.score ?? 1, }; }) .sort((a: any, b: any) => b.score - a.score) @@ -196,9 +198,9 @@ const ChatInfoModal: React.FC = ({ {mode != chatModeLables.graph ? Sources used : <>} {mode != chatModeLables.graph ? Chunks : <>} {mode === chatModeLables.graph_vector || - mode === chatModeLables.graph || - mode === chatModeLables.graph_vector_fulltext || - mode === chatModeLables.entity_vector ? ( + mode === chatModeLables.graph || + mode === chatModeLables.graph_vector_fulltext || + mode === chatModeLables.entity_vector ? ( Top Entities used ) : ( <> From 085a0e40b82bacac337a746c233ebbc4a8189c21 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:44:01 +0000 Subject: [PATCH 140/292] disabled the generate graph for read only user --- frontend/src/components/Content.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 9c32294fd..5aa6ce98e 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -870,7 +870,7 @@ const Content: React.FC = ({ placement='top' label='generate graph' onClick={onClickHandler} - disabled={disableCheck} + disabled={disableCheck || isReadOnlyUser} className='mr-0.5' size={isTablet ? 'small' : 'medium'} > From 78137505f51baa8de37a349379db1b1a547b9dc2 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:47:11 +0000 Subject: [PATCH 141/292] format fixes --- frontend/src/components/ChatBot/ChatInfoModal.tsx | 12 ++++++------ frontend/src/components/FileTable.tsx | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 84cb2ce88..0d56978f7 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -130,9 +130,9 @@ const ChatInfoModal: React.FC = ({ setCommunities( (communitiesData || []) .map((community: { element_id: string }) => { - const communityScore = nodeDetails?.communitydetails?.find((c: { id: string }) => - c.id - === community.element_id); + const communityScore = nodeDetails?.communitydetails?.find( + (c: { id: string }) => c.id === community.element_id + ); return { ...community, score: communityScore?.score ?? 1, @@ -198,9 +198,9 @@ const ChatInfoModal: React.FC = ({ {mode != chatModeLables.graph ? Sources used : <>} {mode != chatModeLables.graph ? Chunks : <>} {mode === chatModeLables.graph_vector || - mode === chatModeLables.graph || - mode === chatModeLables.graph_vector_fulltext || - mode === chatModeLables.entity_vector ? ( + mode === chatModeLables.graph || + mode === chatModeLables.graph_vector_fulltext || + mode === chatModeLables.entity_vector ? ( Top Entities used ) : ( <> diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 2d0d690aa..01ab3a0aa 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -52,7 +52,7 @@ const FileTable = forwardRef((props, ref) => { const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry } = props; const { filesData, setFilesData, model, rowSelection, setRowSelection, setSelectedRows, setProcessedCount, queue } = useFileContext(); - const { userCredentials } = useCredentials(); + const { userCredentials, isReadOnlyUser } = useCredentials(); const columnHelper = createColumnHelper(); const [columnFilters, setColumnFilters] = useState([]); const [isLoading, setIsLoading] = useState(false); @@ -148,7 +148,7 @@ const FileTable = forwardRef((props, ref) => { {info.getValue()} {(info.getValue() === 'Completed' || info.getValue() === 'Failed' || - info.getValue() === 'Cancelled') && ( + (info.getValue() === 'Cancelled' && !isReadOnlyUser)) && ( Date: Fri, 27 Sep 2024 12:54:10 +0000 Subject: [PATCH 142/292] graph labels change --- frontend/src/components/ChatBot/ChatInfoModal.tsx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 84cb2ce88..cde0bbdea 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -32,6 +32,7 @@ import EntitiesInfo from './EntitiesInfo'; import SourcesInfo from './SourcesInfo'; import CommunitiesInfo from './Communities'; import { chatModeLables } from '../../utils/Constants'; +import { Relationship } from '@neo4j-nvl/base'; const ChatInfoModal: React.FC = ({ sources, @@ -99,8 +100,11 @@ const ChatInfoModal: React.FC = ({ if (response.data.status === 'Failure') { throw new Error(response.data.error); } - const nodesData = response?.data?.data?.nodes; - const relationshipsData = response?.data?.data?.relationships; + const nodesData = response?.data?.data?.nodes.map((f: Node) => f) + .filter((node: ExtendedNode) => node.labels.length === 1); + const nodeIds = new Set(nodesData.map((node: any) => node.element_id)); + const relationshipsData = response?.data?.data?.relationships.map((f: Relationship) => f) + .filter((rel: any) => nodeIds.has(rel.end_node_element_id) && nodeIds.has(rel.start_node_element_id));; const communitiesData = response?.data?.data?.community_data; const chunksData = response?.data?.data?.chunk_data; From e0e97fbfe213e5079a71633d9c2ea29439de4acf Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 27 Sep 2024 13:03:35 +0000 Subject: [PATCH 143/292] added the readonly check for already added waiting files --- frontend/src/components/Content.tsx | 4 ++-- frontend/src/components/FileTable.tsx | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 5aa6ce98e..53bf47058 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -154,7 +154,7 @@ const Content: React.FC = ({ if (afterFirstRender) { localStorage.setItem('processedCount', JSON.stringify({ db: userCredentials?.uri, count: processedCount })); } - if (processedCount == batchSize) { + if (processedCount == batchSize && !isReadOnlyUser) { handleGenerateGraph([], true); } if (processedCount === 1 && queue.isEmpty()) { @@ -164,7 +164,7 @@ const Content: React.FC = ({ showSuccessToast('All Q&A functionality is available now.'); })(); } - }, [processedCount, userCredentials, queue]); + }, [processedCount, userCredentials, queue, isReadOnlyUser]); useEffect(() => { if (afterFirstRender) { diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 01ab3a0aa..8855a746e 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -701,7 +701,7 @@ const FileTable = forwardRef((props, ref) => { }, [connectionStatus, userCredentials]); useEffect(() => { - if (connectionStatus && filesData.length && onlyfortheFirstRender) { + if (connectionStatus && filesData.length && onlyfortheFirstRender && !isReadOnlyUser) { const processingFilesCount = filesData.filter((f) => f.status === 'Processing').length; if (processingFilesCount) { if (processingFilesCount === 1) { @@ -718,7 +718,7 @@ const FileTable = forwardRef((props, ref) => { } onlyfortheFirstRender = false; } - }, [connectionStatus, filesData.length]); + }, [connectionStatus, filesData.length, isReadOnlyUser]); const cancelHandler = async (fileName: string, id: string, fileSource: string) => { setFilesData((prevfiles) => From 42f4c82b6ae55cddfccf73633e3aa421a5e10939 Mon Sep 17 00:00:00 2001 From: kaustubh-darekar Date: Fri, 27 Sep 2024 13:20:45 +0000 Subject: [PATCH 144/292] Retriever evaluation using RAGAS --- experiments/RAGAS_evaluation.csv | 103 + experiments/modes_comparison_RAGAS.ipynb | 3442 ++++++++++++++++++++++ 2 files changed, 3545 insertions(+) create mode 100644 experiments/RAGAS_evaluation.csv create mode 100644 experiments/modes_comparison_RAGAS.ipynb diff --git a/experiments/RAGAS_evaluation.csv b/experiments/RAGAS_evaluation.csv new file mode 100644 index 000000000..3e1347135 --- /dev/null +++ b/experiments/RAGAS_evaluation.csv @@ -0,0 +1,103 @@ +question,mode,answer,contexts,ground_truth,question_complexity,context_precision,context_recall,faithfulness,answer_relevancy,answer_correctness +What issues did Amazon's Elastic Block Storage (EBS) service face in 2012 and how did it affect the company?,vector,"Amazon's Elastic Block Storage (EBS) service faced downtime in 2012 due to an electric outage at Amazon's U.S. East data center in Virginia. The outage was significant as it impacted the availability of services, causing inconvenience and discomfort to customers who regularly used the website. This highlighted the critical need for major upgrades to the U.S. East data center to prevent future service disruptions and maintain customer satisfaction.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\nDocument end\n']","In 2012, Amazon's Elastic Block Storage (EBS) service faced several downtimes due to electric outages. The main problem originated from Amazon's U.S. East data center in Virginia, which is the biggest and oldest data center and has been causing outage issues. These server crashes brought inconvenience and discomfort to the customers who use the website regularly. The company needed to do major upgrades on the U.S. East data center to prevent the services from going down in the future.",simple,0.9999999999,1.0,0.75,0.924069484158001,0.7698959011659919 +What issues did Amazon's Elastic Block Storage (EBS) service face in 2012 and how did it affect the company?,fulltext,"Amazon's Elastic Block Storage (EBS) service faced downtime in 2012 due to an electric outage at Amazon's U.S. East data center in Virginia. The outage was significant as it impacted the availability of services, causing inconvenience and discomfort to customers who regularly used the website. This highlighted the critical need for major upgrades to the U.S. East data center to prevent future service disruptions and maintain customer satisfaction.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \nDocument end\n\n\nDocument start\nThis Document belongs to the source github-recovery-codes.txt\nContent: e6b17-50e48\n----\ne5ab9-b2238\nDocument end\n\n\nDocument start\nThis Document belongs to the source https://en.wikipedia.org/wiki/Lord_Kelvin\nContent: ations would approach the deformation for the pure elastic material σ 0 / E {\\displaystyle \\sigma _{0}/E} with the difference decaying exponentially: ε \nDocument end\n']","In 2012, Amazon's Elastic Block Storage (EBS) service faced several downtimes due to electric outages. The main problem originated from Amazon's U.S. East data center in Virginia, which is the biggest and oldest data center and has been causing outage issues. These server crashes brought inconvenience and discomfort to the customers who use the website regularly. The company needed to do major upgrades on the U.S. East data center to prevent the services from going down in the future.",simple,0.9999999999,1.0,0.625,0.9240259114880655,0.797545745335543 +What issues did Amazon's Elastic Block Storage (EBS) service face in 2012 and how did it affect the company?,entity search+vector,"In 2012, Amazon's Elastic Block Storage (EBS) service faced a significant outage that impacted various online services and websites relying on Amazon Web Services (AWS). This outage led to widespread disruptions for companies using AWS, including popular websites like Reddit, Pinterest, and Netflix. The downtime caused by the EBS service issue resulted in financial losses for businesses, tarnished Amazon's reputation for reliability, and raised concerns about the dependency on cloud services for critical operations. The incident highlighted the importance of implementing robust backup and disaster recovery strategies when utilizing cloud services like EBS to mitigate such risks in the future.","[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- \t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n- Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n- \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\ncommunities:\n- Alphabet is a major organization that serves as the parent company to several subsidiaries, including Verily, Waymo, Google Cloud, and YouTube. Ruth Porat holds the position of Chief Financial Officer (CFO) at Alphabet. Additionally, DeepMind is reported as part of Alphabet and also functions as a subsidiary. This network of companies highlights Alphabet's diverse interests in technology, healthcare, and media.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\nrelationships:\n- Amazon USES Elastic Block Storage (Ebs)\n- Amazon LOCATED_AT U.S. East Data Center\nentities:\n- Technology:Elastic Block Storage (Ebs) \n- Electronic_Commerce_Firms \n- Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n- Storage:Persistent Storage \n- Concept:E-Commerce \n- Organization:Icpen Finds ways to tackle consumer problems involving cross-border transactions.\n- Location:U.S. East Data Center \n- Organization:Morgan Grenfell Asset Management \n- Organization:Google Cloud \n- Concept:Threat_Of_New_Entrants Considered low due to low capital investment costs but high infrastructure and inventory requirements.\noutside:\n nodes:\n - Organization:Ebay Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Barnes And Noble Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Wal-Mart World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.\n - Activity:Online Shopping \n - Country:India Emerging country with large population and growing disposable income.\n - Organization:International Consumer Protection And Enforcement Network \n - Product:Video Content \n - Country:China Emerging country with large population and growing disposable income.\n - Organization:Federal Trade Commission \n - Organization:Federal_Trade_Commission Regulates online advertising and the security of consumers’ personal information in the U.S.\n relationships:\n - Organization:Amazon COMPETITOR Organization:Ebay\n - Organization:Ebay COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Ebay\n - Organization:Amazon COMPETITOR Organization:Barnes And Noble\n - Organization:Barnes And Noble COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Barnes And Noble\n - Organization:Amazon COMPETITOR Organization:Wal-Mart\n - Organization:Amazon COMPETES_WITH Organization:Wal-Mart\n - Organization:Amazon BENEFIT Activity:Online Shopping\n - Concept:E-Commerce ENABLES Activity:Online Shopping\n - Organization:Amazon OPPORTUNITY Country:India\n - Organization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\n - Organization:Amazon INVESTS_IN Product:Video Content\n - Organization:Amazon OPPORTUNITY Country:China\n - Organization:Federal Trade Commission REGULATES Organization:Amazon\n - Organization:Federal_Trade_Commission REGULATES Electronic_Commerce_Firms\n\nDocument end\n""]","In 2012, Amazon's Elastic Block Storage (EBS) service faced several downtimes due to electric outages. The main problem originated from Amazon's U.S. East data center in Virginia, which is the biggest and oldest data center and has been causing outage issues. These server crashes brought inconvenience and discomfort to the customers who use the website regularly. The company needed to do major upgrades on the U.S. East data center to prevent the services from going down in the future.",simple,0.0,0.0,0.0,0.9507036338911249,0.733409899743567 +What issues did Amazon's Elastic Block Storage (EBS) service face in 2012 and how did it affect the company?,graph+vector,"Amazon's Elastic Block Storage (EBS) service faced downtime in 2012 due to an electric outage at the U.S. East data center in Virginia, which is the largest and oldest data center for Amazon. This outage caused inconvenience and discomfort to customers who regularly used the website. The company had to undertake major upgrades on the U.S. East data center to prevent future service disruptions. Additionally, the increased expenditures on services like Kindle and Cloud prompted Amazon to plan a reduction in investments in these initiatives to focus on new projects generating more revenue and reflecting the company's value.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER\nDocument end\n']","In 2012, Amazon's Elastic Block Storage (EBS) service faced several downtimes due to electric outages. The main problem originated from Amazon's U.S. East data center in Virginia, which is the biggest and oldest data center and has been causing outage issues. These server crashes brought inconvenience and discomfort to the customers who use the website regularly. The company needed to do major upgrades on the U.S. East data center to prevent the services from going down in the future.",simple,0.9999999999,1.0,1.0,0.9240689240406725,0.8452230921841888 +What issues did Amazon's Elastic Block Storage (EBS) service face in 2012 and how did it affect the company?,graph+vector+fulltext,"Amazon's Elastic Block Storage (EBS) service faced downtime in 2012 due to an electric outage at the U.S. East data center in Virginia, which is the largest and oldest data center for Amazon. This outage caused inconvenience and discomfort for customers who regularly used the website. The company had to undertake major upgrades to prevent future service disruptions. Additionally, the increased expenditures on services like Kindle and Cloud prompted Amazon to plan a reduction in investments in these initiatives to focus on projects generating more revenue and reflecting the company's value.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER\nDocument end\n']","In 2012, Amazon's Elastic Block Storage (EBS) service faced several downtimes due to electric outages. The main problem originated from Amazon's U.S. East data center in Virginia, which is the biggest and oldest data center and has been causing outage issues. These server crashes brought inconvenience and discomfort to the customers who use the website regularly. The company needed to do major upgrades on the U.S. East data center to prevent the services from going down in the future.",simple,0.9999999999,1.0,1.0,0.9266902888856899,0.8449677841574839 +How does the integrated focused cost leadership strategy help competitors compete with Amazon in the book industry?,vector,"The integrated focused cost leadership strategy helps competitors in the book industry target price-sensitive consumers by providing books that are not readily available at a lower price. By focusing on cost leadership and differentiation simultaneously, competitors like Barnes and Noble aim to increase profitability, gain market share, and offer a wide variety of products to compete against Amazon. This strategy allows them to cater to a specific market segment effectively and differentiate themselves from Amazon's offerings.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 0 BUS 478: Seminar on Business Strategy SYNOPSIS PROJECT: AMAZON.COM Group G Group Members: Tristan Landrecht Jessica Zhang John Chen Jin Can Chen Jae Kim \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 0 BUS 478: Seminar on Business Strategy SYNOPSIS PROJECT: AMAZON.COM Group G Group Members: Tristan Landrecht Jessica Zhang John Chen Jin Can Chen Jae Kim \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\nDocument end\n']",The integrated focused cost leadership/differentiation strategy helps competitors compete with Amazon in the book industry by targeting price-sensitive consumers and providing books that are not readily or commonly available at a less expensive price.,simple,0.9999999999,1.0,0.75,0.9797332685421422,0.8178116358805732 +How does the integrated focused cost leadership strategy help competitors compete with Amazon in the book industry?,fulltext,"The integrated focused cost leadership strategy helps competitors in the book industry target price-sensitive consumers by providing books that are not readily available at a lower price. By focusing on cost leadership and differentiation simultaneously, competitors like Barnes and Noble aim to increase profitability, gain market share, and offer a wide variety of products to compete against Amazon. This strategy allows them to attract customers looking for affordable options while also providing unique offerings not commonly found elsewhere, thus creating a competitive edge in the market.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 0 BUS 478: Seminar on Business Strategy SYNOPSIS PROJECT: AMAZON.COM Group G Group Members: Tristan Landrecht Jessica Zhang John Chen Jin Can Chen Jae Kim \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 0 BUS 478: Seminar on Business Strategy SYNOPSIS PROJECT: AMAZON.COM Group G Group Members: Tristan Landrecht Jessica Zhang John Chen Jin Can Chen Jae Kim \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \nDocument end\n']",The integrated focused cost leadership/differentiation strategy helps competitors compete with Amazon in the book industry by targeting price-sensitive consumers and providing books that are not readily or commonly available at a less expensive price.,simple,0.9999999999,1.0,0.8888888888888888,0.9797517766983894,0.7761902121267035 +How does the integrated focused cost leadership strategy help competitors compete with Amazon in the book industry?,entity search+vector,"The integrated focused cost leadership strategy helps competitors like Barnes and Noble compete with Amazon in the book industry by allowing them to offer competitive pricing while maintaining a differentiation focus. This strategy enables companies to provide cost-effective products and services tailored to specific market segments, attracting customers seeking both value and unique offerings. By combining cost leadership with differentiation, competitors can address the bargaining power of buyers effectively, offering a compelling value proposition that can challenge Amazon's market dominance in the book industry.","[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n- \t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n- since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \ncommunities:\n- Amazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n- Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all members of the course Bus 478, which is a seminar on business strategy. They are part of a group within this course, indicating a collaborative or team-based component to the class.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\nrelationships:\n- Amazon COMPETITOR Barnes And Noble\n- Amazon COMPETES_WITH Barnes And Noble\n- Barnes And Noble USES Integrated Focused Cost Leadership/Differentiation\n- Barnes And Noble COMPETES_WITH Amazon\n- Bus 478 PROJECT Amazon.Com\nentities:\n- Integrated Focused Cost Leadership/Differentiation \n- Cost Leadership \n- Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n- Company:Amazon.Com Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.\n- Organization:Barnes And Noble Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n- Electronic_Commerce_Firms \n- Course:Bus 478 Seminar on Business Strategy\n- Concept:Bargaining_Power_Of_Buyers High because customers can choose among numerous online stores for the lowest price.\n- Concept:E-Commerce \n- Organization:Morgan Grenfell Asset Management \noutside:\n nodes:\n - Organization:Ebay Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Wal-Mart World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.\n - Person:Jessica Zhang \n - Person:John Chen \n - Activity:Online Shopping \n - Person:Tristan Landrecht \n - Person:Jae Kim \n - Person:Jin Can Chen \n - Country:China Emerging country with large population and growing disposable income.\n - Country:India Emerging country with large population and growing disposable income.\n relationships:\n - Organization:Ebay USES Cost Leadership\n - Organization:Amazon COMPETITOR Organization:Ebay\n - Organization:Ebay COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Ebay\n - Organization:Wal-Mart USES Cost Leadership\n - Organization:Amazon COMPETITOR Organization:Wal-Mart\n - Organization:Amazon COMPETES_WITH Organization:Wal-Mart\n - Person:Jessica Zhang MEMBER Course:Bus 478\n - Course:Bus 478 GROUP_MEMBER Person:Jessica Zhang\n - Person:John Chen MEMBER Course:Bus 478\n - Course:Bus 478 GROUP_MEMBER Person:John Chen\n - Organization:Amazon BENEFIT Activity:Online Shopping\n - Concept:E-Commerce ENABLES Activity:Online Shopping\n - Person:Tristan Landrecht MEMBER Course:Bus 478\n - Course:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\n - Person:Jae Kim MEMBER Course:Bus 478\n - Course:Bus 478 GROUP_MEMBER Person:Jae Kim\n - Person:Jin Can Chen MEMBER Course:Bus 478\n - Course:Bus 478 GROUP_MEMBER Person:Jin Can Chen\n - Organization:Amazon OPPORTUNITY Country:China\n - Organization:Amazon OPPORTUNITY Country:India\n\nDocument end\n""]",The integrated focused cost leadership/differentiation strategy helps competitors compete with Amazon in the book industry by targeting price-sensitive consumers and providing books that are not readily or commonly available at a less expensive price.,simple,0.9999999999,0.0,1.0,0.9831685457786773,0.6700925458407849 +How does the integrated focused cost leadership strategy help competitors compete with Amazon in the book industry?,graph+vector,"The integrated focused cost leadership strategy helps competitors in the book industry target price-sensitive consumers by providing books that are not readily available at a lower price. This strategy allows competitors to offer unique products and services while maintaining cost efficiency, enabling them to compete with Amazon by attracting customers looking for affordable options and niche book selections.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 0 BUS 478: Seminar on Business Strategy SYNOPSIS PROJECT: AMAZON.COM Group G Group Members: Tristan Landrecht Jessica Zhang John Chen Jin Can Chen Jae Kim \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 0 BUS 478: Seminar on Business Strategy SYNOPSIS PROJECT: AMAZON.COM Group G Group Members: Tristan Landrecht Jessica Zhang John Chen Jin Can Chen Jae Kim \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nDocument end\n']",The integrated focused cost leadership/differentiation strategy helps competitors compete with Amazon in the book industry by targeting price-sensitive consumers and providing books that are not readily or commonly available at a less expensive price.,simple,0.9999999999,1.0,0.42857142857142855,0.9894747362032149,0.7453209000480977 +How does the integrated focused cost leadership strategy help competitors compete with Amazon in the book industry?,graph+vector+fulltext,"The integrated focused cost leadership strategy helps competitors in the book industry target price-sensitive consumers by providing books that are not readily available at a lower price. By combining cost leadership with differentiation, competitors can offer unique products at competitive prices, aiming to increase profitability and gain market share. This strategy allows them to compete with Amazon by offering a variety of products and services that cater to specific consumer needs while maintaining cost efficiency.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 0 BUS 478: Seminar on Business Strategy SYNOPSIS PROJECT: AMAZON.COM Group G Group Members: Tristan Landrecht Jessica Zhang John Chen Jin Can Chen Jae Kim \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 0 BUS 478: Seminar on Business Strategy SYNOPSIS PROJECT: AMAZON.COM Group G Group Members: Tristan Landrecht Jessica Zhang John Chen Jin Can Chen Jae Kim \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nDocument end\n']",The integrated focused cost leadership/differentiation strategy helps competitors compete with Amazon in the book industry by targeting price-sensitive consumers and providing books that are not readily or commonly available at a less expensive price.,simple,0.9999999999,1.0,1.0,0.9894149592194954,0.6202041786772748 +How does the integrated focused cost leadership strategy help competitors compete with Amazon in the book industry?,global search+vector+fulltext,"The integrated focused cost leadership strategy helps competitors like Barnes and Noble compete with Amazon in the book industry by allowing them to offer a unique value proposition. By combining cost leadership with differentiation, Barnes and Noble can focus on specific market segments or niches where they can provide distinct products or services at competitive prices. This strategy enables them to target specific customer needs effectively while maintaining cost efficiency, thereby creating a competitive advantage against Amazon's broader market approach.","[""Document start\nThis Document belongs to the source unknown\nContent: Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, being regulated by the Federal Trade Commission and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nThe community consists of a group of individuals, Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht, who are enrolled in Bus 478, a seminar focused on business strategy. This course involves a collaborative or team-based approach, suggesting that these members work closely together on projects or discussions related to business strategy. Additionally, the community is linked to Amazon.com, a company founded in 1993 by Jeff Benzos. Originally an online bookstore, Amazon has evolved into a vast retail enterprise valued at $48 billion, offering a wide range of products across more than forty categories. The company saw substantial growth during the Dot-Com Bubble, with Jeff Benzos playing a crucial role in its expansion and success. This connection to Amazon may indicate that the course or group discussions could involve case studies or strategic analyses of Amazon's business model and growth trajectory.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offer a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nJin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all members of the course Bus 478, which is a seminar on business strategy. They are part of a group within this course, indicating a collaborative or team-based component to the class.\n----\nThe Etnow Poll event projected that InterGlobe Aviation, a prominent player in the aviation industry, was expected to announce a financial loss of Rs 660 crore. This forecast highlights the financial challenges faced by the company, possibly due to factors such as fluctuating fuel prices, operational costs, or market conditions impacting the aviation sector. The anticipation of such a significant loss underscores the need for strategic adjustments and financial planning to navigate the economic pressures within the industry.\nDocument end\n""]",The integrated focused cost leadership/differentiation strategy helps competitors compete with Amazon in the book industry by targeting price-sensitive consumers and providing books that are not readily or commonly available at a less expensive price.,simple,0.9999999999,0.0,0.25,0.9739451880358826,0.6658696336624164 +What is the competitive strategy of Barnes and Noble in the e-commerce industry?,vector,"Barnes and Noble, as America's largest book retailer, employs an integrated focused cost leadership/differentiation strategy in the e-commerce industry. They target price-sensitive consumers by offering books that are not readily available at a lower price. This strategy allows them to compete with Amazon by providing unique products and competitive pricing to gain market share in the book industry.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n']",The answer to given question is not present in context,simple,0.0,1.0,1.0,0.9732296166055742,0.17532420230388426 +What is the competitive strategy of Barnes and Noble in the e-commerce industry?,fulltext,"Barnes and Noble, as America's largest book retailer, employs an integrated focused cost leadership/differentiation strategy in the e-commerce industry. They target price-sensitive consumers by offering books that are not readily available at a lower price. This strategy allows them to compete with Amazon by providing unique products and competitive pricing to gain market share in the book industry.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n']",The answer to given question is not present in context,simple,0.9999999999,1.0,0.7142857142857143,0.9731916159213346,0.17537330046954663 +What is the competitive strategy of Barnes and Noble in the e-commerce industry?,entity search+vector,"Barnes and Noble employs an integrated focused cost leadership/differentiation strategy in the e-commerce industry. This strategy involves offering unique products or services while also maintaining competitive pricing to attract customers. By combining elements of cost leadership and differentiation, Barnes and Noble aims to stand out from competitors like Amazon and provide value to its customers in the online retail market.","[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- \t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n- Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n- \t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \ncommunities:\n- Neo4j is a comprehensive graph database management system that offers a wide range of products, services, and resources to support graph data science and related applications. It provides various deployment options, including Cloud Managed Services, Self-Hosted, and Graph-As-A-Service, with Amazon AWS as a cloud provider. Neo4j's offerings include the Neo4j Graph Data Science product, Neo4j GraphQL Library, Neo4j Workspace, Neo4j Bloom visualization tool, Neo4j Developer Tools, and Neo4j Data Connectors, all of which are integral parts of the Neo4j ecosystem.\n\nThe organization supports learning and development through resources like Graphacademy, which offers free online courses and certifications, and the Neo4j Blog for daily insights. It also provides a Resource Library with white papers and data sheets, Case Studies showcasing customer success stories, and Executive Insights on graph technology.\n\nNeo4j hosts various events, including the Neo4j Events Hub for live and on-demand training, webinars, and demos, the quarterly online conference Connections, and the global Graphsummit tour. The Cypher query language is a key technology used within Neo4j for exploring and manipulating graph data.\n\nAdditionally, Neo4j includes a company structure with information about its culture, diversity, leadership, and partnerships, offering opportunities to find or become a partner through its Partner Portal.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\nrelationships:\n- E-Commerce ENABLES Online Shopping\n- Barnes And Noble COMPETES_WITH Amazon\n- Amazon COMPETITOR Barnes And Noble\n- Amazon COMPETES_WITH Barnes And Noble\n- Amazon BENEFIT Online Shopping\nentities:\n- Concept:E-Commerce \n- Electronic_Commerce_Firms \n- Organization:Barnes And Noble Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n- Resource:Case Studies Customer success stories across industries.\n- Activity:Online Shopping \n- Online_Shopping_Experience \n- Online_Shopping \n- Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n- Online_Purchasing \n- Use case:Industries & Use Cases Fraud detection, knowledge graphs, financial services, and more\noutside:\n nodes:\n - Organization:Ebay Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Wal-Mart World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.\n - Organization:Neo4J A graph database management system.\n - Concept:Web Security \n - Group:Consumers \n - Group:Baby Boomers \n - Technology:Online Payment Methods \n - Technology:3D Virtual Technology \n - Technology:Smartphones \n - Concept:Modern Tech Support \n relationships:\n - Organization:Amazon COMPETITOR Organization:Ebay\n - Organization:Ebay COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Ebay\n - Organization:Amazon COMPETITOR Organization:Wal-Mart\n - Organization:Amazon COMPETES_WITH Organization:Wal-Mart\n - Organization:Neo4J PROVIDES Resource:Case Studies\n - Concept:Web Security FACILITATES Activity:Online Shopping\n - Group:Consumers PREFERENCE Activity:Online Shopping\n - Group:Baby Boomers LESS_INVOLVED Activity:Online Shopping\n - Technology:Online Payment Methods ENHANCES Activity:Online Shopping\n - Technology:3D Virtual Technology FUTURE Activity:Online Shopping\n - Technology:Smartphones FACILITATES Activity:Online Shopping\n - Concept:Modern Tech Support FACILITATES Activity:Online Shopping\n\nDocument end\n""]",The answer to given question is not present in context,simple,0.0,1.0,0.2,0.9732296166055742,0.17267541840546935 +What is the competitive strategy of Barnes and Noble in the e-commerce industry?,graph+vector,"Barnes and Noble, a major competitor of Amazon in the e-commerce industry, employs an integrated focused cost leadership/differentiation strategy. This strategy targets price-sensitive consumers by offering books that are not readily available at a lower price. By combining elements of cost leadership and differentiation, Barnes and Noble aims to increase profitability and compete effectively with Amazon for market share in the book industry.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nConcept:Modern Tech Support\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nConcept:Modern Tech Support FACILITATES Activity:Online Shopping\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Sh\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nConcept:Modern Tech Support\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nConcept:Modern Tech Support FACILITATES Activity:Online Shopping\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Sh\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon 1.pdf\nContent: ivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: ivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nDocument end\n']",The answer to given question is not present in context,simple,0.9999999999,1.0,1.0,0.9507042440470773,0.17496629219404303 +What is the competitive strategy of Barnes and Noble in the e-commerce industry?,graph+vector+fulltext,"Barnes and Noble employs an integrated focused cost leadership/differentiation strategy in the e-commerce industry. This strategy targets price-sensitive consumers by offering books that are not readily available at a lower price. By combining elements of cost leadership and differentiation, Barnes and Noble competes intensively with Amazon for market share in the book industry.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nConcept:Modern Tech Support\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nConcept:Modern Tech Support FACILITATES Activity:Online Shopping\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Sh\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nConcept:Modern Tech Support\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nConcept:Modern Tech Support FACILITATES Activity:Online Shopping\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Sh\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon 1.pdf\nContent: ivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: ivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nDocument end\n']",The answer to given question is not present in context,simple,0.9999999999,1.0,1.0,0.973263566345624,0.17508211810613783 +What is the competitive strategy of Barnes and Noble in the e-commerce industry?,global search+vector+fulltext,"Barnes and Noble in the e-commerce industry employs an integrated focused cost leadership/differentiation strategy. This strategy combines elements of cost leadership and differentiation to set itself apart from competitors like Amazon and eBay. By focusing on cost efficiency while also offering unique and differentiated products or services, Barnes and Noble aims to carve out a competitive position in the market.","[""Document start\nThis Document belongs to the source unknown\nContent: Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offer a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, being regulated by the Federal Trade Commission and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nThe community consists of a group of individuals, Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht, who are enrolled in Bus 478, a seminar focused on business strategy. This course involves a collaborative or team-based approach, suggesting that these members work closely together on projects or discussions related to business strategy. Additionally, the community is linked to Amazon.com, a company founded in 1993 by Jeff Benzos. Originally an online bookstore, Amazon has evolved into a vast retail enterprise valued at $48 billion, offering a wide range of products across more than forty categories. The company saw substantial growth during the Dot-Com Bubble, with Jeff Benzos playing a crucial role in its expansion and success. This connection to Amazon may indicate that the course or group discussions could involve case studies or strategic analyses of Amazon's business model and growth trajectory.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nAmazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n----\nThe integration of 3D virtual technology is revolutionizing the online shopping experience, offering a more immersive and engaging way for consumers to explore products. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which streamline and simplify the transaction process, making online shopping more accessible and convenient. Furthermore, emerging markets, notably China and India, are poised for significant growth in the online shopping sector. These countries, characterized by their vast populations and rising disposable incomes, present lucrative opportunities for expansion and development in the e-commerce industry.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\nDocument end\n""]",The answer to given question is not present in context,simple,0.0,1.0,0.3333333333333333,0.9732296166055742,0.17212539693538745 +Why should online retailers like Amazon be careful about disclosing online sales taxes in their advertisements?,vector,Online retailers like Amazon should be cautious about disclosing online sales taxes in their advertisements because different sales tax regulations across countries can lead to customer confusion. Failure to accurately disclose these taxes can result in customer dissatisfaction and potential legal issues. Transparency in online sales tax information is crucial to maintain trust with customers and ensure compliance with international policies on online distribution.,"['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n tanked, many people chose to migrate their shopping onto an online platform. In 2011, online sales actually increased by 11 percent despite what people imagined. The reason this has \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n tanked, many people chose to migrate their shopping onto an online platform. In 2011, online sales actually increased by 11 percent despite what people imagined. The reason this has \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \nDocument end\n\n\nDocument start\nThis Document belongs to the source Alphabet_Q1_stock.pdf\nContent: $6.69 billion vs. $6.6 billion, according to StreetAccount. • Google Cloud revenue: $7.45 billion vs. $7.49 billion, according to StreetAccount. • Traffic acquisition costs (TAC): $11.72 billion vs. $11.78 billion, according to StreetAccount. Alphabets revenue rose 3% to $69.79 billion from $68 billion a year earlier, according to the earnings report. The company is mired in a multi-quarter stretch of low single-digit revenue growth after almost two decades of consistent and rapid expansion. With fears of a recession building since last year, advertisers have been reeling in online marketing budgets, wreaking havoc on Google, Facebook and others. \nDocument end\n']","Online retailers like Amazon should be careful about disclosing online sales taxes in their advertisements because different sales taxes can cause confusion to customers, especially in the context of cross-border transactions.",simple,0.9999999999,1.0,0.3333333333333333,0.9977437671677444,0.5751075168304566 +Why should online retailers like Amazon be careful about disclosing online sales taxes in their advertisements?,fulltext,"Online retailers like Amazon should be careful about disclosing online sales taxes in their advertisements because different sales tax regulations can vary between countries or regions. Failure to disclose these taxes can lead to customer confusion and dissatisfaction. Clear disclosure helps to maintain transparency and trust with customers, ensuring they are fully informed about the total cost of their purchases. This transparency is crucial for customer satisfaction and compliance with legal requirements in different jurisdictions.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n tanked, many people chose to migrate their shopping onto an online platform. In 2011, online sales actually increased by 11 percent despite what people imagined. The reason this has \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n tanked, many people chose to migrate their shopping onto an online platform. In 2011, online sales actually increased by 11 percent despite what people imagined. The reason this has \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \nDocument end\n\n\nDocument start\nThis Document belongs to the source Alphabet_Q1_stock.pdf\nContent: $6.69 billion vs. $6.6 billion, according to StreetAccount. • Google Cloud revenue: $7.45 billion vs. $7.49 billion, according to StreetAccount. • Traffic acquisition costs (TAC): $11.72 billion vs. $11.78 billion, according to StreetAccount. Alphabets revenue rose 3% to $69.79 billion from $68 billion a year earlier, according to the earnings report. The company is mired in a multi-quarter stretch of low single-digit revenue growth after almost two decades of consistent and rapid expansion. With fears of a recession building since last year, advertisers have been reeling in online marketing budgets, wreaking havoc on Google, Facebook and others. \nDocument end\n']","Online retailers like Amazon should be careful about disclosing online sales taxes in their advertisements because different sales taxes can cause confusion to customers, especially in the context of cross-border transactions.",simple,0.9999999999,1.0,0.7777777777777778,0.9999999999999997,0.9401644330221344 +Why should online retailers like Amazon be careful about disclosing online sales taxes in their advertisements?,entity search+vector,Online retailers like Amazon should be careful about disclosing online sales taxes in their advertisements because different sales tax rates can lead to customer confusion. Transparency in disclosing these taxes helps to avoid misunderstandings and ensures that customers are aware of the total cost of their purchases. Failure to disclose online sales taxes can result in customer dissatisfaction and potential legal issues related to misleading advertising practices.,"[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n- \t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n- providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \ncommunities:\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n- Generation X and Y, known for their technological adeptness, are demographic groups that actively engage in online purchasing.\n- The Federal Trade Commission, an organization responsible for regulating online advertising and ensuring the security of consumers' personal information in the United States, oversees electronic commerce firms.\nrelationships:\n- Federal_Trade_Commission REGULATES Electronic_Commerce_Firms\n- E-Commerce ENABLES Online Shopping\n- Consumers PREFERENCE Online Shopping\nentities:\n- Online_Sales_Taxes \n- Organization:Federal_Trade_Commission Regulates online advertising and the security of consumers’ personal information in the U.S.\n- Online_Purchasing \n- Online_Shopping \n- Activity:Online Shopping \n- Concept:E-Commerce \n- Concept:Bargaining_Power_Of_Buyers High because customers can choose among numerous online stores for the lowest price.\n- Electronic_Commerce_Firms \n- Online_Shopping_Experience \n- Group:Consumers \noutside:\n nodes:\n - Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n - Location:China_And_India Emerging countries with large populations and growing disposable income.\n - Group:Baby Boomers \n - Concept:3D_Virtual_Technology Technology that will take online shopping to new heights.\n - Concept:Modern Tech Support \n - Concept:Web Security \n - Concept:Electronic_Devices Devices such as smartphones that deliver easy and convenient transaction processes.\n - Technology:3D Virtual Technology \n - Technology:Online Payment Methods \n - Demographic_group:Generation_X_And_Y Technologically adept generations that engage more in online purchasing.\n relationships:\n - Organization:Amazon DISCLOSES Online_Sales_Taxes\n - Organization:Amazon BENEFIT Activity:Online Shopping\n - Location:China_And_India OPPORTUNITY_FOR Online_Shopping\n - Group:Baby Boomers LESS_INVOLVED Activity:Online Shopping\n - Concept:3D_Virtual_Technology ENHANCES Online_Shopping\n - Concept:Modern Tech Support FACILITATES Activity:Online Shopping\n - Concept:Web Security FACILITATES Activity:Online Shopping\n - Concept:Electronic_Devices FACILITATES Online_Shopping\n - Technology:3D Virtual Technology FUTURE Activity:Online Shopping\n - Technology:Online Payment Methods ENHANCES Activity:Online Shopping\n - Demographic_group:Generation_X_And_Y ENGAGES_IN Online_Purchasing\n\nDocument end\n""]","Online retailers like Amazon should be careful about disclosing online sales taxes in their advertisements because different sales taxes can cause confusion to customers, especially in the context of cross-border transactions.",simple,0.9999999999,1.0,0.3333333333333333,0.9999999999999997,0.5748866568875494 +Why should online retailers like Amazon be careful about disclosing online sales taxes in their advertisements?,graph+vector,"Online retailers like Amazon should be cautious about disclosing online sales taxes in their advertisements to avoid causing confusion to customers. Different sales tax regulations in various regions can lead to discrepancies and misunderstandings among customers. By being transparent about online sales taxes, retailers can maintain trust and clarity in their transactions, ensuring a positive customer experience and compliance with legal requirements.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n tanked, many people chose to migrate their shopping onto an online platform. In 2011, online sales actually increased by 11 percent despite what people imagined. The reason this has \n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\nEntities:\n:Cost Leadership\n:Cross-Border_Transactions\n:Differentiation\n:Electronic_Commerce_Firms\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Online_Shopping\n:Smartphone_App\nActivity:Online Shopping\nConcept:Bargaining_Power_Of_Buyers (High because customers can choose among numerous online stores for the lowest price.)\nConcept:E-Commerce\nConcept:Electronic_Devices (Devices such as smartphones that deliver easy and convenient transaction processes.)\nConcept:Web Security\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nGroup:Baby Boomers\nGroup:Consumers\nGroup:Generation X\nGroup:Generation Y\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nMarket:Emerging Markets\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Federal_Trade_Commission (Regulates online advertising and the security of consumers’ personal information in the U.S.)\nOrganization:Icpen (Finds ways to tackle consumer problems involving cross-border transactions.)\nOrganization:International Consumer Protection And Enforcement Network\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:3D Virtual Technology\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\nTechnology:Online Payment Methods\nTechnology:Smartphones\n----\nRelationships:\nConcept:E-Commerce ENABLES Activity:Online Shopping\nConcept:Electronic_Devices FACILITATES :Online_Shopping\nConcept:Web Security FACILITATES Activity:Online Shopping\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nGroup:Baby Boomers LESS_INVOLVED Activity:Online Shopping\nGroup:Consumers PREFERENCE Activity:Online Shopping\nGroup:Generation X TARGET_MARKET Activity:Online Shopping\nGroup:Generation Y TARGET_MARKET Activity:Online Shopping\nMarket:Emerging Markets GROWTH Activity:Online Shopping\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Federal_Trade_Commission REGULATES :Electronic_Commerce_Firms\nOrganization:Icpen ADDRESSES :Cross-Border_Transactions\nOrganization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\nOrganization:Wal-Mart USES :Cost Leadership\nTechnology:3D Virtual Technology FUTURE Activity:Online Shopping\nTechnology:Internet SUPPORTS Activity:Online Shopping\nTechnology:Online Payment Methods ENHANCES Activity:Online Shopping\nTechnology:Smartphones FACILITATES Activity:Online Shopping\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n tanked, many people chose to migrate their shopping onto an online platform. In 2011, online sales actually increased by 11 percent despite what people imagined. The reason this has \n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\nEntities:\n:Cost Leadership\n:Cross-Border_Transactions\n:Differentiation\n:Electronic_Commerce_Firms\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Online_Shopping\n:Smartphone_App\nActivity:Online Shopping\nConcept:Bargaining_Power_Of_Buyers (High because customers can choose among numerous online stores for the lowest price.)\nConcept:E-Commerce\nConcept:Electronic_Devices (Devices such as smartphones that deliver easy and convenient transaction processes.)\nConcept:Web Security\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nGroup:Baby Boomers\nGroup:Consumers\nGroup:Generation X\nGroup:Generation Y\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nMarket:Emerging Markets\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Federal_Trade_Commission (Regulates online advertising and the security of consumers’ personal information in the U.S.)\nOrganization:Icpen (Finds ways to tackle consumer problems involving cross-border transactions.)\nOrganization:International Consumer Protection And Enforcement Network\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:3D Virtual Technology\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\nTechnology:Online Payment Methods\nTechnology:Smartphones\n----\nRelationships:\nConcept:E-Commerce ENABLES Activity:Online Shopping\nConcept:Electronic_Devices FACILITATES :Online_Shopping\nConcept:Web Security FACILITATES Activity:Online Shopping\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nGroup:Baby Boomers LESS_INVOLVED Activity:Online Shopping\nGroup:Consumers PREFERENCE Activity:Online Shopping\nGroup:Generation X TARGET_MARKET Activity:Online Shopping\nGroup:Generation Y TARGET_MARKET Activity:Online Shopping\nMarket:Emerging Markets GROWTH Activity:Online Shopping\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Federal_Trade_Commission REGULATES :Electronic_Commerce_Firms\nOrganization:Icpen ADDRESSES :Cross-Border_Transactions\nOrganization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\nOrganization:Wal-Mart USES :Cost Leadership\nTechnology:3D Virtual Technology FUTURE Activity:Online Shopping\nTechnology:Internet SUPPORTS Activity:Online Shopping\nTechnology:Online Payment Methods ENHANCES Activity:Online Shopping\nTechnology:Smartphones FACILITATES Activity:Online Shopping\nDocument end\n']","Online retailers like Amazon should be careful about disclosing online sales taxes in their advertisements because different sales taxes can cause confusion to customers, especially in the context of cross-border transactions.",simple,0.9999999999,1.0,0.5555555555555556,0.9144513874174006,0.4722205528155351 +Why should online retailers like Amazon be careful about disclosing online sales taxes in their advertisements?,graph+vector+fulltext,"Online retailers like Amazon should be careful about disclosing online sales taxes in their advertisements because different sales tax rates can lead to customer confusion. When advertising products to other countries via online platforms, clear disclosure of online sales taxes is crucial to avoid customer misunderstandings and ensure transparency in pricing. Failure to disclose these taxes can result in customer dissatisfaction and potential legal issues related to misleading advertising practices.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n tanked, many people chose to migrate their shopping onto an online platform. In 2011, online sales actually increased by 11 percent despite what people imagined. The reason this has \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nConcept:E-Commerce\nConcept:Modern Tech Support\nConcept:Web Security\nGroup:Baby Boomers\nGroup:Consumers\nGroup:Generation X\nGroup:Generation Y\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nMarket:Emerging Markets\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Federal Trade Commission\nOrganization:International Consumer Protection And Enforcement Network\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:3D Virtual Technology\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\nTechnology:Online Payment Methods\nTechnology:Smartphones\n----\nRelationships:\nConcept:E-Commerce ENABLES Activity:Online Shopping\nConcept:Modern Tech Support FACILITATES Activity:Online Shopping\nConcept:Web Security FACILITATES Activity:Online Shopping\nGroup:Baby Boomers LESS_INVOLVED Activity:Online Shopping\nGroup:Consumers PREFERENCE Activity:Online Shopping\nGroup:Generation X TARGET_MARKET Activity:Online Shopping\nGroup:Generation Y TARGET_MARKET Activity:Online Shopping\nMarket:Emerging Markets GROWTH Activity:Online Shopping\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Federal Trade Commission REGULATES Organization:Amazon\nOrganization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\nOrganization:Wal-Mart USES :Cost Leadership\nTechnology:3D Virtual Technology FUTURE Activity:Online Shopping\nTechnology:Internet SUPPORTS Activity:Online Shopping\nTechnology:Online Payment Methods ENHANCES Activity:Online Shopping\nTechnology:Smartphones FACILITATES Activity:Online Shopping\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n tanked, many people chose to migrate their shopping onto an online platform. In 2011, online sales actually increased by 11 percent despite what people imagined. The reason this has \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nConcept:E-Commerce\nConcept:Modern Tech Support\nConcept:Web Security\nGroup:Baby Boomers\nGroup:Consumers\nGroup:Generation X\nGroup:Generation Y\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nMarket:Emerging Markets\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Federal Trade Commission\nOrganization:International Consumer Protection And Enforcement Network\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:3D Virtual Technology\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\nTechnology:Online Payment Methods\nTechnology:Smartphones\n----\nRelationships:\nConcept:E-Commerce ENABLES Activity:Online Shopping\nConcept:Modern Tech Support FACILITATES Activity:Online Shopping\nConcept:Web Security FACILITATES Activity:Online Shopping\nGroup:Baby Boomers LESS_INVOLVED Activity:Online Shopping\nGroup:Consumers PREFERENCE Activity:Online Shopping\nGroup:Generation X TARGET_MARKET Activity:Online Shopping\nGroup:Generation Y TARGET_MARKET Activity:Online Shopping\nMarket:Emerging Markets GROWTH Activity:Online Shopping\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Federal Trade Commission REGULATES Organization:Amazon\nOrganization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\nOrganization:Wal-Mart USES :Cost Leadership\nTechnology:3D Virtual Technology FUTURE Activity:Online Shopping\nTechnology:Internet SUPPORTS Activity:Online Shopping\nTechnology:Online Payment Methods ENHANCES Activity:Online Shopping\nTechnology:Smartphones FACILITATES Activity:Online Shopping\nDocument end\n\n\nDocument start\nThis Document belongs to the source Alphabet_Q1_stock.pdf\nContent: Text Content:\n Alphabet reports revenue and earnings beat for first quarter PUBLISHED TUE, APR 25 20239:00 AM EDTUPDATED TUE, APR 25 20239:25 PM EDT Alphabet reported first-quarter results on Tuesday that exceeded analysts estimates. The stock jumped over 4% in extended trading before paring its gains. The company also said its board authorized a $70 billion share buyback. Here are the key numbers: • Earnings: $1.17 per share vs. $1.07 per share expected, according to Refinitiv. • Revenue: $69.79 billion vs. $68.9 billion expected, according to Refinitiv. The beat on the top and bottom lines breaks a string of four straight quarters in which the company missed consensus estimates. • YouTube advertising revenue: $6.69 billion vs. $6.6 billion, according to StreetAccount. \n----\nOn its investor call, finance chief Ruth Porat said due to the challenging economic environment the outlook remains uncertain. Ad revenue beat analyst expectations, but fell from the year prior to $54.55 billion. YouTube ad revenue stayed in line with analyst expectations, also declining from a year ago. In addition to the overall pullback in ad spending, YouTube is also facing heightened competition from TikTok in short-form videos. To grapple with the recent advertising weakness, Google has had to make its most extreme cuts in company history, including laying off 12,000 employees — about 6% of its workforce — in January. This month, CFO Ruth Porat announced “multi-year” cuts to things like real estate, employee services and equipment. Alphabet reported $2.6 billion in charges related to the layoffs and office space reduction during the quarter. Net income fell to $15.05 billion, or $1.\n----\nEntities:\nOrganization:Alphabet\nOrganization:Deepmind\nOrganization:Google\nOrganization:Google Cloud\nOrganization:Verily\nOrganization:Waymo\nOrganization:Youtube\nPerson:Ruth Porat\n----\nRelationships:\nOrganization:Alphabet CFO Person:Ruth Porat\nOrganization:Alphabet SUBSIDIARY Organization:Google Cloud\nOrganization:Alphabet SUBSIDIARY Organization:Verily\nOrganization:Alphabet SUBSIDIARY Organization:Waymo\nOrganization:Alphabet SUBSIDIARY Organization:Youtube\nOrganization:Deepmind SUBSIDIARY Organization:Alphabet\nOrganization:Google SUBSIDIARY Organization:Alphabet\nOrganization:Verily SUBSIDIARY Organization:Alphabet\nOrganization:Waymo SUBSIDIARY Organization:Alphabet\nDocument end\n']","Online retailers like Amazon should be careful about disclosing online sales taxes in their advertisements because different sales taxes can cause confusion to customers, especially in the context of cross-border transactions.",simple,0.9999999999,1.0,0.5714285714285714,0.999998130852297,0.934309972514247 +Why should online retailers like Amazon be careful about disclosing online sales taxes in their advertisements?,global search+vector+fulltext,"Online retailers like Amazon should be cautious about disclosing online sales taxes in their advertisements to maintain transparency and compliance with regulations. By clearly stating the inclusion of sales taxes, they build trust with customers and avoid potential legal issues related to false advertising or misleading practices. Additionally, disclosing online sales taxes helps prevent any misunderstandings or surprises for customers during the checkout process, leading to a smoother shopping experience and reducing the risk of customer dissatisfaction or complaints.","[""Document start\nThis Document belongs to the source unknown\nContent: Amazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, being regulated by the Federal Trade Commission and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offer a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nThe Federal Trade Commission, an organization responsible for regulating online advertising and ensuring the security of consumers' personal information in the United States, oversees electronic commerce firms.\n----\nThe Federal Trade Commission (FTC) is a key regulatory body in the United States tasked with overseeing electronic commerce firms. Its primary responsibilities include regulating online advertising practices and ensuring the security and protection of consumers' personal information. The FTC plays a crucial role in maintaining fair and secure digital marketplaces by enforcing laws and guidelines that govern how businesses operate online, thereby safeguarding consumer interests and promoting trust in electronic commerce.\n----\nThe Federal Trade Commission (FTC) is a regulatory body in the United States tasked with overseeing electronic commerce firms. Its primary responsibilities include regulating online advertising practices and ensuring the security and protection of consumers' personal information. The FTC plays a crucial role in maintaining fair and secure digital marketplaces by enforcing laws and guidelines that govern how businesses operate online, thereby safeguarding consumer interests.\n----\nThe Federal Trade Commission (FTC) is a key regulatory body in the United States tasked with overseeing electronic commerce firms. Its primary responsibilities include regulating online advertising practices and ensuring the security and protection of consumers' personal information. The FTC plays a crucial role in maintaining fair and secure digital marketplaces, safeguarding consumer rights, and promoting trust in online transactions.\n----\nAmazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\nDocument end\n""]","Online retailers like Amazon should be careful about disclosing online sales taxes in their advertisements because different sales taxes can cause confusion to customers, especially in the context of cross-border transactions.",simple,0.9999999999,1.0,0.0,0.9788114251760524,0.3457883227733063 +How threatening are substitute products to Amazon's patented tech and customer service?,vector,"Substitute products pose a high threat to Amazon despite its patented technology and customer service. While Amazon's unique features like 1-Click Ordering differentiate it, there are many alternatives offering similar products. This situation diminishes Amazon's competitive advantage in product offerings. However, Amazon maintains an advantage in customer service and convenience, which helps mitigate the threat of substitute products to some extent.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \nDocument end\n']","The threat of substitutes for Amazon is high. There are many alternatives providing the same products and services, which could reduce Amazon's competitive advantage. However, Amazon does have the advantage when it comes to the quality of customer service and convenience provided.",reasoning,0.9999999999,1.0,0.8333333333333334,0.951196677614003,0.6950129184145829 +How threatening are substitute products to Amazon's patented tech and customer service?,fulltext,"Substitute products pose a high threat to Amazon despite its patented technology and customer service. While Amazon's unique features like 1-Click Ordering differentiate it, there are many alternatives offering similar products. This situation diminishes Amazon's competitive advantage in product offerings. However, Amazon maintains an advantage in customer service and convenience, which helps mitigate the threat of substitutes to some extent.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \nDocument end\n']","The threat of substitutes for Amazon is high. There are many alternatives providing the same products and services, which could reduce Amazon's competitive advantage. However, Amazon does have the advantage when it comes to the quality of customer service and convenience provided.",reasoning,0.9999999999,1.0,1.0,0.9511736417752749,0.6802307199189462 +How threatening are substitute products to Amazon's patented tech and customer service?,entity search+vector,"Substitute products can pose a moderate threat to Amazon's patented technology and customer service. While Amazon has established a strong presence and loyal customer base, competitors offering similar products or services could potentially lure customers away. Amazon's patented technology and exceptional customer service are key differentiators that help mitigate this threat, but continuous innovation and customer-centric strategies are essential to maintain a competitive edge in the market.","[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n- \t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n- since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \ncommunities:\n- Neo4j is a comprehensive graph database management system that offers a wide range of products, services, and resources to support graph data science and related applications. It provides various deployment options, including Cloud Managed Services, Self-Hosted, and Graph-As-A-Service, with Amazon AWS as a cloud provider. Neo4j's offerings include the Neo4j Graph Data Science product, Neo4j GraphQL Library, Neo4j Workspace, Neo4j Bloom visualization tool, Neo4j Developer Tools, and Neo4j Data Connectors, all of which are integral parts of the Neo4j ecosystem.\n\nThe organization supports learning and development through resources like Graphacademy, which offers free online courses and certifications, and the Neo4j Blog for daily insights. It also provides a Resource Library with white papers and data sheets, Case Studies showcasing customer success stories, and Executive Insights on graph technology.\n\nNeo4j hosts various events, including the Neo4j Events Hub for live and on-demand training, webinars, and demos, the quarterly online conference Connections, and the global Graphsummit tour. The Cypher query language is a key technology used within Neo4j for exploring and manipulating graph data.\n\nAdditionally, Neo4j includes a company structure with information about its culture, diversity, leadership, and partnerships, offering opportunities to find or become a partner through its Partner Portal.\n- Prabhakar Raghavan is an employee at Google, where Sundar Pichai serves as the CEO. Google has developed several products, including Bard, Pixel, and Android. The company has established partnerships with major organizations such as Samsung and Apple. Additionally, Samsung is considering changes in its relationship with Google.\n- Amazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\nrelationships:\n- Amazon COMPETITOR Wal-Mart\n- Amazon COMPETES_WITH Wal-Mart\n- Federal_Trade_Commission REGULATES Electronic_Commerce_Firms\nentities:\n- Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n- Organization:Amazon Cooperation Treaty Organization \n- Company:Amazon Aws \n- Electronic_Commerce_Firms \n- Company:Amazon.Com Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.\n- Group:Consumers \n- Organization:Apple \n- Organization:Federal_Trade_Commission Regulates online advertising and the security of consumers’ personal information in the U.S.\n- Organization:Wal-Mart World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.\n- Online_Shopping \noutside:\n nodes:\n - Organization:Ebay Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Barnes And Noble Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Activity:Online Shopping \n - Organization:Federal Trade Commission \n - Technology:Elastic Block Storage (Ebs) \n - Country:China Emerging country with large population and growing disposable income.\n - Technology:Cloud \n - Product:Video Content \n - Country:India Emerging country with large population and growing disposable income.\n - Organization:International Consumer Protection And Enforcement Network \n relationships:\n - Organization:Amazon COMPETITOR Organization:Ebay\n - Organization:Ebay COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Ebay\n - Organization:Amazon COMPETITOR Organization:Barnes And Noble\n - Organization:Barnes And Noble COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Barnes And Noble\n - Organization:Amazon BENEFIT Activity:Online Shopping\n - Group:Consumers PREFERENCE Activity:Online Shopping\n - Organization:Federal Trade Commission REGULATES Organization:Amazon\n - Organization:Amazon USES Technology:Elastic Block Storage (Ebs)\n - Organization:Amazon OPPORTUNITY Country:China\n - Organization:Amazon INVESTS_IN Technology:Cloud\n - Organization:Amazon INVESTS_IN Product:Video Content\n - Organization:Amazon OPPORTUNITY Country:India\n - Organization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\n\nDocument end\n""]","The threat of substitutes for Amazon is high. There are many alternatives providing the same products and services, which could reduce Amazon's competitive advantage. However, Amazon does have the advantage when it comes to the quality of customer service and convenience provided.",reasoning,0.9999999999,0.0,0.5555555555555556,0.9668322495233318,0.5657109211952066 +How threatening are substitute products to Amazon's patented tech and customer service?,graph+vector,"Substitute products pose a high threat to Amazon despite its patented technology like 1-Click Ordering. While these unique features differentiate Amazon, there are many alternatives offering similar products. However, Amazon's competitive advantage lies in its quality customer service and convenience rather than absolute product uniqueness.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nConcept:E-Commerce\nConcept:Modern Tech Support\nConcept:Web Security\nGroup:Baby Boomers\nGroup:Consumers\nGroup:Generation X\nGroup:Generation Y\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nMarket:Emerging Markets\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Federal Trade Commission\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:3D Virtual Technology\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\nTechnology:Online Payment Methods\nTechnology:Smartphones\n----\nRelationships:\nConcept:E-Commerce ENABLES Activity:Online Shopping\nConcept:Modern Tech Support FACILITATES Activity:Online Shopping\nConcept:Web Security FACILITATES Activity:Online Shopping\nGroup:Baby Boomers LESS_INVOLVED Activity:Online Shopping\nGroup:Consumers PREFERENCE Activity:Online Shopping\nGroup:Generation X TARGET_MARKET Activity:Online Shopping\nGroup:Generation Y TARGET_MARKET Activity:Online Shopping\nMarket:Emerging Markets GROWTH Activity:Online Shopping\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Federal Trade Commission REGULATES Organization:Amazon\nOrganization:Wal-Mart USES :Cost Leadership\nTechnology:3D Virtual Technology FUTURE Activity:Online Shopping\nTechnology:Internet SUPPORTS Activity:Online Shopping\nTechnology:Online Payment Methods ENHANCES Activity:Online Shopping\nTechnology:Smartphones FACILITATES Activity:Online Shopping\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nConcept:E-Commerce\nConcept:Modern Tech Support\nConcept:Web Security\nGroup:Baby Boomers\nGroup:Consumers\nGroup:Generation X\nGroup:Generation Y\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nMarket:Emerging Markets\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Federal Trade Commission\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:3D Virtual Technology\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\nTechnology:Online Payment Methods\nTechnology:Smartphones\n----\nRelationships:\nConcept:E-Commerce ENABLES Activity:Online Shopping\nConcept:Modern Tech Support FACILITATES Activity:Online Shopping\nConcept:Web Security FACILITATES Activity:Online Shopping\nGroup:Baby Boomers LESS_INVOLVED Activity:Online Shopping\nGroup:Consumers PREFERENCE Activity:Online Shopping\nGroup:Generation X TARGET_MARKET Activity:Online Shopping\nGroup:Generation Y TARGET_MARKET Activity:Online Shopping\nMarket:Emerging Markets GROWTH Activity:Online Shopping\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Federal Trade Commission REGULATES Organization:Amazon\nOrganization:Wal-Mart USES :Cost Leadership\nTechnology:3D Virtual Technology FUTURE Activity:Online Shopping\nTechnology:Internet SUPPORTS Activity:Online Shopping\nTechnology:Online Payment Methods ENHANCES Activity:Online Shopping\nTechnology:Smartphones FACILITATES Activity:Online Shopping\nDocument end\n']","The threat of substitutes for Amazon is high. There are many alternatives providing the same products and services, which could reduce Amazon's competitive advantage. However, Amazon does have the advantage when it comes to the quality of customer service and convenience provided.",reasoning,0.9999999999,1.0,1.0,0.0,0.7728515969055618 +How threatening are substitute products to Amazon's patented tech and customer service?,graph+vector+fulltext,"Substitute products pose a high threat to Amazon despite its patented technology like 1-Click Ordering. While these technologies differentiate Amazon, there are many alternatives offering similar products. However, Amazon's competitive advantage lies in its quality customer service and convenience rather than absolute product uniqueness.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nMarket:Emerging Markets\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nMarket:Emerging Markets GROWTH Activity:Online Shopping\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam D\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nMarket:Emerging Markets\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nMarket:Emerging Markets GROWTH Activity:Online Shopping\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam D\nDocument end\n']","The threat of substitutes for Amazon is high. There are many alternatives providing the same products and services, which could reduce Amazon's competitive advantage. However, Amazon does have the advantage when it comes to the quality of customer service and convenience provided.",reasoning,0.9999999999,1.0,1.0,0.8926176723855943,0.8618653144968493 +How threatening are substitute products to Amazon's patented tech and customer service?,global search+vector+fulltext,"Substitute products can pose a moderate threat to Amazon's patented technology and customer service. While Amazon's patents provide a level of protection for its innovations, substitute products could still impact its market share and competitive position. Amazon's strong customer service reputation may mitigate some of this threat by fostering customer loyalty. However, continuous innovation and strategic differentiation are crucial for Amazon to stay ahead of potential substitutes in the rapidly evolving e-commerce landscape.","[""Document start\nThis Document belongs to the source unknown\nContent: Amazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, being regulated by the Federal Trade Commission and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nNeo4j is a comprehensive graph database management system that offers a wide range of products, services, and resources to support graph data science and related applications. It provides various deployment options, including Cloud Managed Services, Self-Hosted, and Graph-As-A-Service, with Amazon AWS as a cloud provider. Neo4j's offerings include the Neo4j Graph Data Science product, Neo4j GraphQL Library, Neo4j Workspace, Neo4j Bloom visualization tool, Neo4j Developer Tools, and Neo4j Data Connectors, all of which are integral parts of the Neo4j ecosystem.\n\nThe organization supports learning and development through resources like Graphacademy, which offers free online courses and certifications, and the Neo4j Blog for daily insights. It also provides a Resource Library with white papers and data sheets, Case Studies showcasing customer success stories, and Executive Insights on graph technology.\n\nNeo4j hosts various events, including the Neo4j Events Hub for live and on-demand training, webinars, and demos, the quarterly online conference Connections, and the global Graphsummit tour. The Cypher query language is a key technology used within Neo4j for exploring and manipulating graph data.\n\nAdditionally, Neo4j includes a company structure with information about its culture, diversity, leadership, and partnerships, offering opportunities to find or become a partner through its Partner Portal.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offer a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n----\nThe community consists of a group of individuals, Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht, who are enrolled in Bus 478, a seminar focused on business strategy. This course involves a collaborative or team-based approach, suggesting that these members work closely together on projects or discussions related to business strategy. Additionally, the community is linked to Amazon.com, a company founded in 1993 by Jeff Benzos. Originally an online bookstore, Amazon has evolved into a vast retail enterprise valued at $48 billion, offering a wide range of products across more than forty categories. The company saw substantial growth during the Dot-Com Bubble, with Jeff Benzos playing a crucial role in its expansion and success. This connection to Amazon may indicate that the course or group discussions could involve case studies or strategic analyses of Amazon's business model and growth trajectory.\n----\nAmazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n----\nThe summaries collectively highlight a network of prominent technology companies and their products, partnerships, and organizational structures. Google, led by CEO Sundar Pichai, is a key player with products like Bard, Pixel, and Android, and partnerships with major companies such as Samsung and Apple. Samsung is contemplating changes in its relationship with Google. Alphabet, Google's parent company, encompasses a diverse range of subsidiaries, including Verily, Waymo, Google Cloud, YouTube, and DeepMind, with Ruth Porat as CFO. This structure underscores Alphabet's broad interests in technology, healthcare, and media.\n\nMicrosoft, another major organization, has developed Bing and faces competition from Gemini Ai. OpenAI is noted for its product, ChatGPT. Neo4j is a comprehensive graph database management system offering a suite of products and services, including Neo4j Graph Data Science, Neo4j GraphQL Library, and Neo4j Bloom, among others. It supports learning through Graphacademy and hosts events like the Neo4j Events Hub and Graphsummit. Neo4j Auradb, a cloud-based service, provides advanced graph database and analytics capabilities with enterprise-grade security.\n\nOverall, these summaries depict a vibrant ecosystem of technology companies, each contributing to advancements in their respective fields through innovative products, strategic partnerships, and a commitment to development and learning.\n----\nThe summaries collectively highlight a network of prominent technology companies and their products, partnerships, and organizational structures. Google, led by CEO Sundar Pichai, is a key player with products like Bard, Pixel, and Android, and partnerships with major companies such as Samsung and Apple. Samsung is contemplating changes in its relationship with Google. Alphabet, Google's parent company, encompasses a diverse range of subsidiaries, including Verily, Waymo, Google Cloud, YouTube, and DeepMind, with Ruth Porat as CFO. This structure underscores Alphabet's broad interests in technology, healthcare, and media.\n\nMicrosoft, another major organization, has developed Bing and faces competition from Gemini Ai. OpenAI is noted for its product, ChatGPT. Neo4j is a comprehensive graph database management system offering a suite of products and services, including Neo4j Graph Data Science, Neo4j GraphQL Library, and Neo4j Bloom, among others. It supports learning through Graphacademy and hosts events like the Neo4j Events Hub and Graphsummit. Neo4j Auradb, a cloud-based service, provides advanced graph database and analytics capabilities with enterprise-grade security.\n\nOverall, these summaries depict a vibrant ecosystem of technology companies, each contributing to advancements in their respective fields through innovative products, strategic partnerships, and a commitment to development and education.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\nDocument end\n""]","The threat of substitutes for Amazon is high. There are many alternatives providing the same products and services, which could reduce Amazon's competitive advantage. However, Amazon does have the advantage when it comes to the quality of customer service and convenience provided.",reasoning,0.0,0.0,0.3,0.9667636548197255,0.8461425352478676 +What was the 2012 issue with Amazon's EBS and its proposed fix?,vector,"In 2012, Amazon faced issues with its Elastic Block Storage (EBS) service due to an electric outage at the U.S. East data center in Virginia, the largest and oldest data center causing the outages. To prevent future service disruptions, Amazon planned major upgrades for the U.S. East data center. This upgrade was necessary to avoid inconveniences and discomfort for regular website users caused by server crashes.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\nDocument end\n\n\nDocument start\nThis Document belongs to the source github-recovery-codes.txt\nContent: e6b17-50e48\n----\ne5ab9-b2238\nDocument end\n']","In 2012, Amazon's Elastic Block Storage (EBS) service experienced several outages due to an electric outage. The main problem originated from Amazon's U.S. East data center in Virginia, which is the biggest and oldest data center. The proposed fix for this issue is to perform major upgrades on the U.S. East data center to prevent the services from going down in the future.",reasoning,0.9999999999,1.0,1.0,0.9348951275957792,0.8232985491060043 +What was the 2012 issue with Amazon's EBS and its proposed fix?,fulltext,"In 2012, Amazon faced issues with its Elastic Block Storage (EBS) service due to an electric outage at the U.S. East data center in Virginia. The outage was primarily caused by problems in this data center, which is the largest and oldest, leading to service disruptions. To prevent such incidents in the future and avoid inconvenience to customers, Amazon planned major upgrades for the U.S. East data center.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\nDocument end\n\n\nDocument start\nThis Document belongs to the source github-recovery-codes.txt\nContent: e6b17-50e48\n----\ne5ab9-b2238\nDocument end\n']","In 2012, Amazon's Elastic Block Storage (EBS) service experienced several outages due to an electric outage. The main problem originated from Amazon's U.S. East data center in Virginia, which is the biggest and oldest data center. The proposed fix for this issue is to perform major upgrades on the U.S. East data center to prevent the services from going down in the future.",reasoning,0.9999999999,1.0,1.0,0.9348951275957792,0.9967040798900392 +What was the 2012 issue with Amazon's EBS and its proposed fix?,entity search+vector,"The 2012 issue with Amazon's Elastic Block Storage (EBS) was related to service downtime caused by an electric outage at Amazon's U.S. East data center in Virginia. To address this problem and prevent future service disruptions, Amazon planned major upgrades on the U.S. East data center. These upgrades were crucial to avoid inconveniences and discomfort for customers relying on Amazon's services.","[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- \t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n- influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n- \t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\ncommunities:\n- Neo4j is a comprehensive graph database management system that offers a wide range of products, services, and resources to support graph data science and related applications. It provides various deployment options, including Cloud Managed Services, Self-Hosted, and Graph-As-A-Service, with Amazon AWS as a cloud provider. Neo4j's offerings include the Neo4j Graph Data Science product, Neo4j GraphQL Library, Neo4j Workspace, Neo4j Bloom visualization tool, Neo4j Developer Tools, and Neo4j Data Connectors, all of which are integral parts of the Neo4j ecosystem.\n\nThe organization supports learning and development through resources like Graphacademy, which offers free online courses and certifications, and the Neo4j Blog for daily insights. It also provides a Resource Library with white papers and data sheets, Case Studies showcasing customer success stories, and Executive Insights on graph technology.\n\nNeo4j hosts various events, including the Neo4j Events Hub for live and on-demand training, webinars, and demos, the quarterly online conference Connections, and the global Graphsummit tour. The Cypher query language is a key technology used within Neo4j for exploring and manipulating graph data.\n\nAdditionally, Neo4j includes a company structure with information about its culture, diversity, leadership, and partnerships, offering opportunities to find or become a partner through its Partner Portal.\n- Alphabet is a major organization that serves as the parent company to several subsidiaries, including Verily, Waymo, Google Cloud, and YouTube. Ruth Porat holds the position of Chief Financial Officer (CFO) at Alphabet. Additionally, DeepMind is reported as part of Alphabet and also functions as a subsidiary. This network of companies highlights Alphabet's diverse interests in technology, healthcare, and media.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\nentities:\n- Location:U.S. East Data Center \n- Technology:Elastic Block Storage (Ebs) \n- Company:Amazon Aws \n- Organization:Amazon Cooperation Treaty Organization \n- Organization:Google Cloud \n- Product:Deployment Center Get started. Download, integrate, and deploy.\n- Azure \n- Technology:Cloud \n- Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n- Concept:Modern Tech Support \nrelationships:\n- Amazon LOCATED_AT U.S. East Data Center\n- Amazon USES Elastic Block Storage (Ebs)\n- Amazon INVESTS_IN Cloud\noutside:\n nodes:\n - Organization:Barnes And Noble Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Ebay Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Wal-Mart World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.\n - Activity:Online Shopping \n - Organization:Federal Trade Commission \n - Software:Authorization Library Based on Azure or other 3rd party authorization stack.\n - Organization:International Consumer Protection And Enforcement Network \n - Country:India Emerging country with large population and growing disposable income.\n - Organization:Alphabet \n - Organization:Neo4J A graph database management system.\n relationships:\n - Organization:Amazon COMPETITOR Organization:Barnes And Noble\n - Organization:Barnes And Noble COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Barnes And Noble\n - Organization:Amazon COMPETITOR Organization:Ebay\n - Organization:Ebay COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Ebay\n - Organization:Amazon COMPETITOR Organization:Wal-Mart\n - Organization:Amazon COMPETES_WITH Organization:Wal-Mart\n - Organization:Amazon BENEFIT Activity:Online Shopping\n - Concept:Modern Tech Support FACILITATES Activity:Online Shopping\n - Organization:Federal Trade Commission REGULATES Organization:Amazon\n - Software:Authorization Library BASED_ON Azure\n - Organization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\n - Organization:Amazon OPPORTUNITY Country:India\n - Organization:Alphabet SUBSIDIARY Organization:Google Cloud\n - Company:Amazon Aws CLOUD_PROVIDER Organization:Neo4J\n\nDocument end\n""]","In 2012, Amazon's Elastic Block Storage (EBS) service experienced several outages due to an electric outage. The main problem originated from Amazon's U.S. East data center in Virginia, which is the biggest and oldest data center. The proposed fix for this issue is to perform major upgrades on the U.S. East data center to prevent the services from going down in the future.",reasoning,0.9999999999,1.0,0.6666666666666666,0.9683981941081358,0.8442264233747876 +What was the 2012 issue with Amazon's EBS and its proposed fix?,graph+vector,"In 2012, Amazon faced issues with its Elastic Block Storage (EBS) service due to an electric outage at the U.S. East data center in Virginia. The outage was primarily caused by problems in this data center, which is the largest and oldest, leading to service disruptions. To prevent such incidents in the future and avoid inconveniencing customers, Amazon planned major upgrades for the U.S. East data center.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nDocument end\n\n\nDocument start\nThis Document belongs to the source github-recovery-codes.txt\nContent: Text Content:\ne6b17-50e48\n----\ne5ab9-b2238\n----\nEntities:\nUnknown:25A0D-3Bed5Df476-8C8Ade6B17-50E48F0A6C-E36E6\nUnknown:5E24C-D96Aad5029-Ca4B7Bc7F7-Edaeee5Ab9-B2238\n----\nRelationships:\n\nDocument end\n']","In 2012, Amazon's Elastic Block Storage (EBS) service experienced several outages due to an electric outage. The main problem originated from Amazon's U.S. East data center in Virginia, which is the biggest and oldest data center. The proposed fix for this issue is to perform major upgrades on the U.S. East data center to prevent the services from going down in the future.",reasoning,0.9999999999,1.0,1.0,0.9348775117397056,0.9965557282468844 +What was the 2012 issue with Amazon's EBS and its proposed fix?,graph+vector+fulltext,"In 2012, Amazon faced issues with its Elastic Block Storage (EBS) service due to an electric outage at the U.S. East data center in Virginia. The outage was primarily caused by problems at this data center, which is the largest and oldest, leading to service disruptions. To prevent such incidents in the future and avoid inconvenience to customers, Amazon planned major upgrades for the U.S. East data center.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nDocument end\n\n\nDocument start\nThis Document belongs to the source github-recovery-codes.txt\nContent: Text Content:\ne6b17-50e48\n----\ne5ab9-b2238\n----\nEntities:\nUnknown:25A0D-3Bed5Df476-8C8Ade6B17-50E48F0A6C-E36E6\nUnknown:5E24C-D96Aad5029-Ca4B7Bc7F7-Edaeee5Ab9-B2238\n----\nRelationships:\n\nDocument end\n']","In 2012, Amazon's Elastic Block Storage (EBS) service experienced several outages due to an electric outage. The main problem originated from Amazon's U.S. East data center in Virginia, which is the biggest and oldest data center. The proposed fix for this issue is to perform major upgrades on the U.S. East data center to prevent the services from going down in the future.",reasoning,0.9999999999,1.0,1.0,0.9349194337018917,0.9466832748148085 +What strategy led to Amazon's growth from a small bookstore to a $48B retail giant?,vector,"Amazon's growth from a small online bookstore to a $48 billion retail giant was primarily fueled by a strategic shift in their business approach. Initially focusing on being the ""world's largest bookstore,"" Amazon later transitioned to a mergers and acquisitions strategy, expanding into various product categories and leveraging technological advancements. This shift allowed Amazon to evolve into a comprehensive online platform offering a wide range of products, emphasizing low prices, convenience, selection, and availability. Additionally, Amazon's ability to adapt to changing consumer behaviors, exploit emerging markets like China and India, and capitalize on the power of e-commerce and technological progress were key factors in its remarkable growth trajectory.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \nDocument end\n']","Amazon's growth from a small bookstore to a $48 billion retail giant was largely due to its transformation of corporate-level strategy into a mergers and acquisitions approach. Additionally, Amazon's goal to be a place where a customer can purchase anything online, and the advancements in technology such as computational power, bandwidth and data storage, also contributed to its growth.",reasoning,0.9999999999,1.0,1.0,0.9584663569619937,0.6703444617276766 +What strategy led to Amazon's growth from a small bookstore to a $48B retail giant?,fulltext,"Amazon's growth from a small online bookstore to a $48 billion retail giant was primarily fueled by a strategic shift in their corporate-level strategy towards mergers and acquisitions. This approach, coupled with a focus on technological progress, allowed Amazon to transform into a diversified platform offering a wide range of products across various categories. By leveraging computational power, bandwidth, data storage technologies, and investing in distribution centers, Amazon expanded its offerings, improved efficiency, and enhanced customer experience, ultimately driving its significant growth and success in the retail industry.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \nDocument end\n']","Amazon's growth from a small bookstore to a $48 billion retail giant was largely due to its transformation of corporate-level strategy into a mergers and acquisitions approach. Additionally, Amazon's goal to be a place where a customer can purchase anything online, and the advancements in technology such as computational power, bandwidth and data storage, also contributed to its growth.",reasoning,0.9999999999,1.0,1.0,0.961033547001144,0.9949075988542893 +What strategy led to Amazon's growth from a small bookstore to a $48B retail giant?,entity search+vector,"Amazon's growth from a small bookstore to a $48 billion retail giant can be attributed to several key strategies. Initially starting as an online bookstore, Amazon expanded its product offerings across more than forty categories, leveraging a customer-centric approach, continuous innovation, and strategic investments in technology. Jeff Bezos, the founder, played a pivotal role in driving Amazon's growth by focusing on customer experience, operational efficiency, and global market expansion. Additionally, Amazon's emphasis on long-term vision, diversification into cloud services, video content, and Kindle products, along with investments in distribution centers, contributed significantly to its rapid growth and success in the e-commerce industry.","[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n- \t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n- since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \ncommunities:\n- Amazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\nentities:\n- Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n- Company:Amazon.Com Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.\n- Organization:Barnes And Noble Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n- Organization:Wal-Mart World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.\n- Concept:E-Commerce \n- Concept:Bargaining_Power_Of_Buyers High because customers can choose among numerous online stores for the lowest price.\n- Electronic_Commerce_Firms \n- Online_Purchasing \n- Activity:Online Shopping \n- Online_Shopping \nrelationships:\n- Amazon COMPETITOR Barnes And Noble\n- Amazon COMPETES_WITH Barnes And Noble\n- Amazon COMPETITOR Wal-Mart\n- Amazon COMPETES_WITH Wal-Mart\n- Amazon BENEFIT Online Shopping\n- Barnes And Noble COMPETES_WITH Amazon\n- E-Commerce ENABLES Online Shopping\noutside:\n nodes:\n - Organization:Ebay Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Country:India Emerging country with large population and growing disposable income.\n - Technology:Elastic Block Storage (Ebs) \n - Country:China Emerging country with large population and growing disposable income.\n - Product:Video Content \n - Technology:Cloud \n - Organization:Federal Trade Commission \n - Product:Kindle \n - Location:Distribution Centers \n - Organization:International Consumer Protection And Enforcement Network \n relationships:\n - Organization:Amazon COMPETITOR Organization:Ebay\n - Organization:Ebay COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Ebay\n - Organization:Amazon OPPORTUNITY Country:India\n - Organization:Amazon USES Technology:Elastic Block Storage (Ebs)\n - Organization:Amazon OPPORTUNITY Country:China\n - Organization:Amazon INVESTS_IN Product:Video Content\n - Organization:Amazon INVESTS_IN Technology:Cloud\n - Organization:Federal Trade Commission REGULATES Organization:Amazon\n - Organization:Amazon INVESTS_IN Product:Kindle\n - Organization:Amazon INVESTS_IN Location:Distribution Centers\n - Organization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\n\nDocument end\n""]","Amazon's growth from a small bookstore to a $48 billion retail giant was largely due to its transformation of corporate-level strategy into a mergers and acquisitions approach. Additionally, Amazon's goal to be a place where a customer can purchase anything online, and the advancements in technology such as computational power, bandwidth and data storage, also contributed to its growth.",reasoning,0.9999999999,0.0,0.5294117647058824,0.9620756699751561,0.9436288774421768 +What strategy led to Amazon's growth from a small bookstore to a $48B retail giant?,graph+vector,"Amazon's growth from a small online bookstore to a $48 billion retail giant was primarily fueled by a strategic shift towards mergers and acquisitions, coupled with a focus on technological progress. Initially starting as an online bookstore, Amazon transformed its corporate-level strategy into an approach that emphasized mergers and acquisitions, allowing it to expand rapidly. Additionally, Amazon leveraged advancements in computational power, bandwidth, and data storage technologies to diversify its product offerings, enhance customer experience, and scale its operations across various categories. These strategic moves enabled Amazon to evolve into a massive retail machine with a broad market presence and a valuation of $48 billion.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:\nDocument end\n']","Amazon's growth from a small bookstore to a $48 billion retail giant was largely due to its transformation of corporate-level strategy into a mergers and acquisitions approach. Additionally, Amazon's goal to be a place where a customer can purchase anything online, and the advancements in technology such as computational power, bandwidth and data storage, also contributed to its growth.",reasoning,0.9999999999,1.0,0.5333333333333333,0.9610932535289535,0.8788590318886854 +What strategy led to Amazon's growth from a small bookstore to a $48B retail giant?,graph+vector+fulltext,"Amazon's growth from a small bookstore to a $48 billion retail giant was primarily fueled by a strategic shift in their business approach. Initially focusing on online book sales, Amazon later transitioned to a mergers and acquisitions strategy, expanding into various product categories and leveraging technological advancements. This shift allowed Amazon to evolve into a comprehensive online platform offering a wide range of products, emphasizing low prices, convenience, selection, and availability. Additionally, Amazon's ability to adapt to emerging markets like China and India, along with investments in infrastructure and data storage technologies, played a crucial role in their remarkable growth trajectory.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:\nDocument end\n']","Amazon's growth from a small bookstore to a $48 billion retail giant was largely due to its transformation of corporate-level strategy into a mergers and acquisitions approach. Additionally, Amazon's goal to be a place where a customer can purchase anything online, and the advancements in technology such as computational power, bandwidth and data storage, also contributed to its growth.",reasoning,0.9999999999,1.0,1.0,0.96201214858725,0.8253596616571912 +What strategy led to Amazon's growth from a small bookstore to a $48B retail giant?,global search+vector+fulltext,"Amazon's growth from a small online bookstore to a $48 billion retail giant can be attributed to several key strategies. Initially, Amazon focused on providing a wide selection of products and convenient online shopping experiences. Jeff Bezos, the founder, emphasized customer satisfaction, efficient logistics, and continuous innovation. Amazon's strategic use of technology, like 1-Click Ordering and cloud services, also played a crucial role in streamlining operations and enhancing customer service. Additionally, Amazon's expansion into various product categories, investment in emerging markets like China, and strategic partnerships contributed significantly to its remarkable growth trajectory.","[""Document start\nThis Document belongs to the source unknown\nContent: Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n----\nThe community consists of a group of individuals, Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht, who are enrolled in Bus 478, a seminar focused on business strategy. This course involves a collaborative or team-based approach, suggesting that these members work closely together on projects or discussions related to business strategy. Additionally, the community is linked to Amazon.com, a company founded in 1993 by Jeff Benzos. Originally an online bookstore, Amazon has evolved into a vast retail enterprise valued at $48 billion, offering a wide range of products across more than forty categories. The company saw substantial growth during the Dot-Com Bubble, with Jeff Benzos playing a crucial role in its expansion and success. This connection to Amazon may indicate that the course or group discussions could involve case studies or strategic analyses of Amazon's business model and growth trajectory.\n----\nAmazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, being regulated by the Federal Trade Commission and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offer a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\nDocument end\n""]","Amazon's growth from a small bookstore to a $48 billion retail giant was largely due to its transformation of corporate-level strategy into a mergers and acquisitions approach. Additionally, Amazon's goal to be a place where a customer can purchase anything online, and the advancements in technology such as computational power, bandwidth and data storage, also contributed to its growth.",reasoning,0.9999999999,0.0,0.6875,0.9583597742929056,0.7944593587522599 +What strategic shift helped Amazon evolve from a small bookstore to a $48B retail giant?,vector,"Amazon evolved from a small bookstore to a $48 billion retail giant by strategically shifting its corporate-level strategy towards mergers and acquisitions. This shift allowed Amazon to expand its business operations, diversify its offerings, and tap into new markets successfully. Additionally, Amazon leveraged technological progress, computational power, bandwidth, and data distribution centers to enhance its operations and scale up its business efficiently.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \nDocument end\n']",Amazon transformed their corporate-level strategy into a mergers and acquisitions approach.,reasoning,0.9999999999,1.0,0.7,0.9534779033321638,0.47546527728430277 +What strategic shift helped Amazon evolve from a small bookstore to a $48B retail giant?,fulltext,"Amazon evolved from a small bookstore to a $48 billion retail giant by strategically shifting its corporate-level strategy towards mergers and acquisitions. This approach allowed Amazon to expand its business operations, diversify its offerings, and tap into new markets successfully. Additionally, leveraging technological progress in areas like computational power, bandwidth, and data storage played a crucial role in Amazon's transformation and growth over the years.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 0 BUS 478: Seminar on Business Strategy SYNOPSIS PROJECT: AMAZON.COM Group G Group Members: Tristan Landrecht Jessica Zhang John Chen Jin Can Chen Jae Kim \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 0 BUS 478: Seminar on Business Strategy SYNOPSIS PROJECT: AMAZON.COM Group G Group Members: Tristan Landrecht Jessica Zhang John Chen Jin Can Chen Jae Kim \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\nDocument end\n']",Amazon transformed their corporate-level strategy into a mergers and acquisitions approach.,reasoning,0.9999999999,1.0,0.7142857142857143,0.9534779033321638,0.9758717973852911 +What strategic shift helped Amazon evolve from a small bookstore to a $48B retail giant?,entity search+vector,"Amazon evolved from a small bookstore to a $48 billion retail giant through strategic shifts such as diversification into various product categories, embracing technological advancements like cloud services and Kindle products, investing in distribution centers to support global expansion, and focusing on capturing emerging markets like China with growing disposable income. Additionally, Amazon's founder, Jeff Bezos, played a pivotal role in the company's development and expansion by driving innovation and customer-centric strategies.","[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n- \t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n- since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \ncommunities:\n- Amazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\nentities:\n- Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n- Organization:Barnes And Noble Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n- Company:Amazon.Com Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.\n- Organization:Wal-Mart World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.\n- Electronic_Commerce_Firms \n- Concept:E-Commerce \n- Concept:Bargaining_Power_Of_Buyers High because customers can choose among numerous online stores for the lowest price.\n- Online_Shopping \n- Activity:Online Shopping \n- Online_Purchasing \nrelationships:\n- Amazon COMPETITOR Barnes And Noble\n- Amazon COMPETES_WITH Barnes And Noble\n- Amazon COMPETITOR Wal-Mart\n- Amazon COMPETES_WITH Wal-Mart\n- Amazon BENEFIT Online Shopping\n- Barnes And Noble COMPETES_WITH Amazon\n- E-Commerce ENABLES Online Shopping\noutside:\n nodes:\n - Organization:Ebay Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Country:India Emerging country with large population and growing disposable income.\n - Technology:Elastic Block Storage (Ebs) \n - Country:China Emerging country with large population and growing disposable income.\n - Product:Video Content \n - Technology:Cloud \n - Organization:Federal Trade Commission \n - Product:Kindle \n - Location:Distribution Centers \n - Organization:International Consumer Protection And Enforcement Network \n relationships:\n - Organization:Amazon COMPETITOR Organization:Ebay\n - Organization:Ebay COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Ebay\n - Organization:Amazon OPPORTUNITY Country:India\n - Organization:Amazon USES Technology:Elastic Block Storage (Ebs)\n - Organization:Amazon OPPORTUNITY Country:China\n - Organization:Amazon INVESTS_IN Product:Video Content\n - Organization:Amazon INVESTS_IN Technology:Cloud\n - Organization:Federal Trade Commission REGULATES Organization:Amazon\n - Organization:Amazon INVESTS_IN Product:Kindle\n - Organization:Amazon INVESTS_IN Location:Distribution Centers\n - Organization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\n\nDocument end\n""]",Amazon transformed their corporate-level strategy into a mergers and acquisitions approach.,reasoning,0.0,0.0,0.9,0.9534664131070926,0.8910963069767317 +What strategic shift helped Amazon evolve from a small bookstore to a $48B retail giant?,graph+vector,"Amazon evolved from a small bookstore to a $48 billion retail giant by strategically shifting its corporate-level strategy towards mergers and acquisitions. This shift allowed Amazon to expand its business beyond books and transform into a diverse retail platform offering products in over forty categories. Additionally, leveraging technological progress in computational power, bandwidth, and data storage enabled Amazon to scale its operations and become a significant player in the e-commerce industry.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht\nDocument end\n']",Amazon transformed their corporate-level strategy into a mergers and acquisitions approach.,reasoning,0.9999999999,1.0,1.0,0.9534664131070926,0.9737308895318777 +What strategic shift helped Amazon evolve from a small bookstore to a $48B retail giant?,graph+vector+fulltext,"Amazon evolved from a small bookstore to a $48 billion retail giant by strategically shifting its focus from being just an online bookstore to becoming a one-stop shop for almost anything online. Initially slow growth was overcome by leveraging the momentum from the dot-com bubble. Amazon transformed its corporate-level strategy into mergers and acquisitions, expanding its offerings and services. This shift allowed Amazon to capitalize on technological progress, invest in infrastructure, and enhance its core competencies to capture a significant share of the global market.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht\nDocument end\n']",Amazon transformed their corporate-level strategy into a mergers and acquisitions approach.,reasoning,0.9999999999,1.0,0.6,0.953534996984493,0.4557118485937385 +What strategic shift helped Amazon evolve from a small bookstore to a $48B retail giant?,global search+vector+fulltext,"Amazon's strategic shift from a small online bookstore to a $48 billion retail giant was primarily driven by its focus on strategic business planning, leadership, and continuous innovation. Jeff Bezos, the founder of Amazon, played a pivotal role in this transformation by emphasizing customer-centric strategies, diversifying product offerings across more than forty categories, and leveraging the growth opportunities presented during the Dot-Com Bubble. Amazon's ability to adapt, expand its product range, invest in technology, and prioritize customer experience were key factors in its remarkable evolution and success.","[""Document start\nThis Document belongs to the source unknown\nContent: The community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nThe community consists of a group of individuals, Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht, who are enrolled in Bus 478, a seminar focused on business strategy. This course involves a collaborative or team-based approach, suggesting that these members work closely together on projects or discussions related to business strategy. Additionally, the community is linked to Amazon.com, a company founded in 1993 by Jeff Benzos. Originally an online bookstore, Amazon has evolved into a vast retail enterprise valued at $48 billion, offering a wide range of products across more than forty categories. The company saw substantial growth during the Dot-Com Bubble, with Jeff Benzos playing a crucial role in its expansion and success. This connection to Amazon may indicate that the course or group discussions could involve case studies or strategic analyses of Amazon's business model and growth trajectory.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nAmazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n----\nAmazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, being regulated by the Federal Trade Commission and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offer a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\nDocument end\n""]",Amazon transformed their corporate-level strategy into a mergers and acquisitions approach.,reasoning,0.0,0.0,0.6153846153846154,0.9564108581225913,0.7824061935944068 +"How has Amazon's M and A strategy, transitioning from an online bookstore to a multi-category platform, been shaped by competition?",vector,"Amazon's mergers and acquisitions (M&A) strategy evolved as a response to competition in the e-commerce industry. Initially known as an online bookstore, Amazon faced intense rivalry from competitors like eBay, Barnes and Noble, and Wal-Mart. To stay competitive, Amazon expanded its business model to become a multi-category platform offering a wide range of products. This strategic shift was influenced by the need to differentiate itself and maintain a competitive edge in the market. By diversifying its offerings through acquisitions and mergers, Amazon aimed to enhance its competitive position and cater to a broader customer base beyond just books.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \nDocument end\n']","The context does not provide specific information on how Amazon's M&A strategy, transitioning from an online bookstore to a multi-category platform, has been shaped by competition.",multi_context,0.0,1.0,0.5555555555555556,0.9315804599105225,0.8393456041533853 +"How has Amazon's M and A strategy, transitioning from an online bookstore to a multi-category platform, been shaped by competition?",fulltext,"Amazon's mergers and acquisitions (M&A) strategy, transitioning from an online bookstore to a multi-category platform, has been significantly influenced by competition. As Amazon faced intense rivalry among competing firms, especially from major competitors like eBay, Barnes and Noble, and Wal-Mart, it strategically expanded its offerings through acquisitions to stay competitive. By diversifying into various categories beyond books, Amazon aimed to differentiate itself and maintain its market position amidst the high competition in the e-commerce industry. This strategic shift allowed Amazon to leverage its core competencies and adapt to changing consumer preferences, ultimately driving its growth and success as a multi-category platform.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \nDocument end\n']","The context does not provide specific information on how Amazon's M&A strategy, transitioning from an online bookstore to a multi-category platform, has been shaped by competition.",multi_context,0.0,1.0,0.5454545454545454,0.9308140201045719,0.4373952418942165 +"How has Amazon's M and A strategy, transitioning from an online bookstore to a multi-category platform, been shaped by competition?",entity search+vector,"Amazon's M&A strategy, evolving from an online bookstore to a diverse multi-category platform, has been significantly influenced by competition. Competitors like eBay, Barnes and Noble, and Wal-Mart have played a crucial role in shaping Amazon's expansion. The competitive landscape has driven Amazon to diversify its offerings, invest in new technologies like cloud services and video content, and expand into emerging markets like China and India. This strategic response to competition has propelled Amazon's growth and transformation into a retail giant across various product categories.","[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- \t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n- Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n- providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \ncommunities:\n- Neo4j is a comprehensive graph database management system that offers a wide range of products, services, and resources to support graph data science and related applications. It provides various deployment options, including Cloud Managed Services, Self-Hosted, and Graph-As-A-Service, with Amazon AWS as a cloud provider. Neo4j's offerings include the Neo4j Graph Data Science product, Neo4j GraphQL Library, Neo4j Workspace, Neo4j Bloom visualization tool, Neo4j Developer Tools, and Neo4j Data Connectors, all of which are integral parts of the Neo4j ecosystem.\n\nThe organization supports learning and development through resources like Graphacademy, which offers free online courses and certifications, and the Neo4j Blog for daily insights. It also provides a Resource Library with white papers and data sheets, Case Studies showcasing customer success stories, and Executive Insights on graph technology.\n\nNeo4j hosts various events, including the Neo4j Events Hub for live and on-demand training, webinars, and demos, the quarterly online conference Connections, and the global Graphsummit tour. The Cypher query language is a key technology used within Neo4j for exploring and manipulating graph data.\n\nAdditionally, Neo4j includes a company structure with information about its culture, diversity, leadership, and partnerships, offering opportunities to find or become a partner through its Partner Portal.\n- Amazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\nentities:\n- Organization:Amazon Cooperation Treaty Organization \n- Company:Amazon Aws \n- Electronic_Commerce_Firms \n- Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n- Concept:Bargaining_Power_Of_Suppliers Low due to the substantial number of suppliers and the vast online global distribution network.\n- Concept:Bargaining_Power_Of_Buyers High because customers can choose among numerous online stores for the lowest price.\n- Concept:E-Commerce \n- Company:Amazon.Com Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.\n- Global_Market \n- Online_Purchasing \nrelationships:\n- Amazon CAPTURES Global_Market\noutside:\n nodes:\n - Organization:Barnes And Noble Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Ebay Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Wal-Mart World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.\n - Activity:Online Shopping \n - Country:India Emerging country with large population and growing disposable income.\n - Organization:Federal Trade Commission \n - Organization:International Consumer Protection And Enforcement Network \n - Organization:Federal_Trade_Commission Regulates online advertising and the security of consumers’ personal information in the U.S.\n - Country:China Emerging country with large population and growing disposable income.\n - Organization:Neo4J A graph database management system.\n relationships:\n - Organization:Amazon COMPETITOR Organization:Barnes And Noble\n - Organization:Barnes And Noble COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Barnes And Noble\n - Organization:Amazon COMPETITOR Organization:Ebay\n - Organization:Ebay COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Ebay\n - Organization:Amazon COMPETITOR Organization:Wal-Mart\n - Organization:Amazon COMPETES_WITH Organization:Wal-Mart\n - Organization:Amazon BENEFIT Activity:Online Shopping\n - Concept:E-Commerce ENABLES Activity:Online Shopping\n - Organization:Amazon OPPORTUNITY Country:India\n - Organization:Federal Trade Commission REGULATES Organization:Amazon\n - Organization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\n - Organization:Federal_Trade_Commission REGULATES Electronic_Commerce_Firms\n - Organization:Amazon OPPORTUNITY Country:China\n - Company:Amazon Aws CLOUD_PROVIDER Organization:Neo4J\n\nDocument end\n""]","The context does not provide specific information on how Amazon's M&A strategy, transitioning from an online bookstore to a multi-category platform, has been shaped by competition.",multi_context,0.0,1.0,0.25,0.9415222169452915,0.5234713306082578 +"How has Amazon's M and A strategy, transitioning from an online bookstore to a multi-category platform, been shaped by competition?",graph+vector,"Amazon's mergers and acquisitions (M&A) strategy, transitioning from an online bookstore to a multi-category platform, has been significantly influenced by competition. As Amazon faced intense rivalry among competing firms, especially from major competitors like eBay, Barnes and Noble, and Wal-Mart, it strategically expanded its offerings through acquisitions to stay competitive. By acquiring companies in various categories, Amazon diversified its product range, enhanced its market presence, and strengthened its competitive position in the face of substitutes and increasing competition in the e-commerce industry.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\n\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\n\nDocument end\n']","The context does not provide specific information on how Amazon's M&A strategy, transitioning from an online bookstore to a multi-category platform, has been shaped by competition.",multi_context,0.0,1.0,0.375,0.9380030186858774,0.522217312685086 +"How has Amazon's M and A strategy, transitioning from an online bookstore to a multi-category platform, been shaped by competition?",graph+vector+fulltext,"Amazon's mergers and acquisitions (M&A) strategy, transitioning from an online bookstore to a multi-category platform, has been significantly influenced by competition. As Amazon faced intense rivalry among competing firms, especially from major competitors like eBay, Barnes and Noble, and Wal-Mart, it strategically expanded its offerings through acquisitions to stay competitive. By diversifying into various categories beyond books, Amazon aimed to differentiate itself and maintain its market position. This strategic shift allowed Amazon to leverage its core competencies, adapt to changing consumer preferences, and enhance its competitive advantage in the evolving e-commerce landscape.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\n\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\n\nDocument end\n']","The context does not provide specific information on how Amazon's M&A strategy, transitioning from an online bookstore to a multi-category platform, has been shaped by competition.",multi_context,0.0,1.0,0.6363636363636364,0.9309101016692881,0.43721641843356046 +"How has Amazon's M and A strategy, transitioning from an online bookstore to a multi-category platform, been shaped by competition?",global search+vector+fulltext,"Amazon's mergers and acquisitions (M&A) strategy, evolving from an online bookstore to a diverse multi-category platform, has been significantly influenced by competition. The competitive landscape with major players like eBay, Barnes and Noble, and Wal-Mart has prompted Amazon to strategically expand its offerings through acquisitions to stay ahead. By observing competitors' strategies, Amazon has diversified its product range, invested in technologies like Elastic Block Storage (EBS) and cloud services, and expanded into emerging markets like China. This competitive environment has driven Amazon to innovate, acquire new capabilities, and enhance its market presence to effectively compete and lead in the e-commerce industry.","[""Document start\nThis Document belongs to the source unknown\nContent: Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n----\nThe community consists of a group of individuals, Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht, who are enrolled in Bus 478, a seminar focused on business strategy. This course involves a collaborative or team-based approach, suggesting that these members work closely together on projects or discussions related to business strategy. Additionally, the community is linked to Amazon.com, a company founded in 1993 by Jeff Benzos. Originally an online bookstore, Amazon has evolved into a vast retail enterprise valued at $48 billion, offering a wide range of products across more than forty categories. The company saw substantial growth during the Dot-Com Bubble, with Jeff Benzos playing a crucial role in its expansion and success. This connection to Amazon may indicate that the course or group discussions could involve case studies or strategic analyses of Amazon's business model and growth trajectory.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offer a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, being regulated by the Federal Trade Commission and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nAmazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n----\nT. Albert is associated with Maaillustrations.Com, an organization, through an illustrative collaboration.\n----\nJin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all members of the course Bus 478, which is a seminar on business strategy. They are part of a group within this course, indicating a collaborative or team-based component to the class.\nDocument end\n""]","The context does not provide specific information on how Amazon's M&A strategy, transitioning from an online bookstore to a multi-category platform, has been shaped by competition.",multi_context,0.0,1.0,0.5384615384615384,0.9379910810877466,0.9091835548929823 +What strategies are Amazon's book competitors using for market share and how does it compare to Amazon's?,vector,"Amazon's book competitors, such as Barnes and Noble, employ an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by offering books not readily available at a lower price. They aim to increase profitability, gain market share, and provide a wide variety of products through competitive pricing, cost leadership, and differentiation strategies. In comparison, Amazon utilizes a broad corporate-level strategy that includes mergers and acquisitions, transforming into a one-stop online retail destination. Amazon focuses on low prices, convenience, selection, and availability, leveraging technological progress to cater to a wide range of products beyond books.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \nDocument end\n']","Amazon's book competitors are pursuing an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. They believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. They offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. On the other hand, Amazon uses a huge amount of data storage to improve customer service and is planning to reduce investments in the Kindle and Cloud initiatives next year. They are also interested in long-term investments, such as video content and distribution centers.",multi_context,0.9999999999,0.7142857142857143,0.9375,0.9328662796416046,0.7306795185463123 +What strategies are Amazon's book competitors using for market share and how does it compare to Amazon's?,fulltext,"Amazon's book competitors, such as Barnes and Noble, employ an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by offering books not readily available at a lower price. They aim to increase profitability, gain market share, and provide a wide variety of products through competitive pricing, cost leadership, and differentiation strategies. In comparison, Amazon focuses on a broader market strategy, aiming to be a one-stop-shop for all online purchases, not just limited to books. Amazon's competitive advantage lies in its vast selection, convenience, and customer service quality rather than just pricing or product differentiation in the book industry.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n']","Amazon's book competitors are pursuing an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. They believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. They offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. On the other hand, Amazon uses a huge amount of data storage to improve customer service and is planning to reduce investments in the Kindle and Cloud initiatives next year. They are also interested in long-term investments, such as video content and distribution centers.",multi_context,0.9999999999,0.7142857142857143,0.9285714285714286,0.9328848633149723,0.7158051886779939 +What strategies are Amazon's book competitors using for market share and how does it compare to Amazon's?,entity search+vector,"Amazon's book competitors, like Barnes and Noble and eBay, employ various strategies to compete for market share. Barnes and Noble focuses on physical retail stores, offering a personalized in-store experience and a wide selection of books. On the other hand, eBay emphasizes its online marketplace model, enabling individuals to buy and sell books directly. + +In comparison to Amazon, Barnes and Noble and eBay face challenges due to Amazon's extensive product range, competitive pricing, and efficient online platform. Amazon's ability to offer a vast selection of books, competitive prices, and convenient shopping experience has positioned it as a dominant force in the market, making it challenging for competitors to match its scale and customer reach.","[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- \t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n- Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n- providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \ncommunities:\n- Neo4j is a comprehensive graph database management system that offers a wide range of products, services, and resources to support graph data science and related applications. It provides various deployment options, including Cloud Managed Services, Self-Hosted, and Graph-As-A-Service, with Amazon AWS as a cloud provider. Neo4j's offerings include the Neo4j Graph Data Science product, Neo4j GraphQL Library, Neo4j Workspace, Neo4j Bloom visualization tool, Neo4j Developer Tools, and Neo4j Data Connectors, all of which are integral parts of the Neo4j ecosystem.\n\nThe organization supports learning and development through resources like Graphacademy, which offers free online courses and certifications, and the Neo4j Blog for daily insights. It also provides a Resource Library with white papers and data sheets, Case Studies showcasing customer success stories, and Executive Insights on graph technology.\n\nNeo4j hosts various events, including the Neo4j Events Hub for live and on-demand training, webinars, and demos, the quarterly online conference Connections, and the global Graphsummit tour. The Cypher query language is a key technology used within Neo4j for exploring and manipulating graph data.\n\nAdditionally, Neo4j includes a company structure with information about its culture, diversity, leadership, and partnerships, offering opportunities to find or become a partner through its Partner Portal.\n- Amazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\nentities:\n- Company:Amazon.Com Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.\n- Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n- Company:Amazon Aws \n- Organization:Barnes And Noble Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n- Organization:Amazon Cooperation Treaty Organization \n- Concept:Bargaining_Power_Of_Buyers High because customers can choose among numerous online stores for the lowest price.\n- Concept:E-Commerce \n- Electronic_Commerce_Firms \n- Concept:Bargaining_Power_Of_Suppliers Low due to the substantial number of suppliers and the vast online global distribution network.\n- Group:Consumers \nrelationships:\n- Amazon COMPETITOR Barnes And Noble\n- Amazon COMPETES_WITH Barnes And Noble\n- Barnes And Noble COMPETES_WITH Amazon\noutside:\n nodes:\n - Activity:Online Shopping \n - Organization:Ebay Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Wal-Mart World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.\n - Date:1993 \n - Person:Jeff Benzos Founder of Amazon.com\n - Organization:International Consumer Protection And Enforcement Network \n - Organization:Federal Trade Commission \n - Course:Bus 478 Seminar on Business Strategy\n - Country:India Emerging country with large population and growing disposable income.\n - Event:Dot-Com Bubble \n relationships:\n - Organization:Amazon BENEFIT Activity:Online Shopping\n - Concept:E-Commerce ENABLES Activity:Online Shopping\n - Group:Consumers PREFERENCE Activity:Online Shopping\n - Organization:Amazon COMPETITOR Organization:Ebay\n - Organization:Ebay COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Ebay\n - Organization:Amazon COMPETITOR Organization:Wal-Mart\n - Organization:Amazon COMPETES_WITH Organization:Wal-Mart\n - Company:Amazon.Com FOUNDED Date:1993\n - Company:Amazon.Com FOUNDER Person:Jeff Benzos\n - Organization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\n - Organization:Federal Trade Commission REGULATES Organization:Amazon\n - Course:Bus 478 PROJECT Company:Amazon.Com\n - Organization:Amazon OPPORTUNITY Country:India\n - Company:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\n\nDocument end\n""]","Amazon's book competitors are pursuing an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. They believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. They offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. On the other hand, Amazon uses a huge amount of data storage to improve customer service and is planning to reduce investments in the Kindle and Cloud initiatives next year. They are also interested in long-term investments, such as video content and distribution centers.",multi_context,0.9999999999,0.14285714285714285,0.4375,0.9700955094824905,0.6921551328857438 +What strategies are Amazon's book competitors using for market share and how does it compare to Amazon's?,graph+vector,"Amazon's book competitors, such as eBay and Barnes and Noble, employ various strategies to compete for market share. eBay focuses on cost leadership by operating as an online auction house without physical inventories, while also offering a wide range of products and high-quality services. Barnes and Noble, on the other hand, utilizes an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing unique books at competitive prices. + +In comparison to Amazon, these competitors aim to increase profitability, gain market share, and offer a variety of products through competitive pricing, cost leadership, and differentiation strategies. While Amazon has a strong presence in the market due to its technological advancements, broad product categories, and global reach, its competitors like eBay and Barnes and Noble focus on specific niches and strategies to carve out their market share in the book industry.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\n\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\n\nDocument end\n']","Amazon's book competitors are pursuing an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. They believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. They offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. On the other hand, Amazon uses a huge amount of data storage to improve customer service and is planning to reduce investments in the Kindle and Cloud initiatives next year. They are also interested in long-term investments, such as video content and distribution centers.",multi_context,0.9999999999,0.7142857142857143,1.0,0.9579425934098755,0.49139780263611665 +What strategies are Amazon's book competitors using for market share and how does it compare to Amazon's?,graph+vector+fulltext,"Amazon's book competitors, such as eBay and Barnes and Noble, employ various strategies to compete for market share. eBay focuses on cost leadership by operating as an online auction house without physical inventory, while also offering a wide range of products and high-quality services. Barnes and Noble, on the other hand, utilizes an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing unique books at competitive prices. + +In comparison to Amazon, these competitors aim to increase profitability, gain market share, and offer a variety of products through competitive pricing, cost leadership, and differentiation strategies. While Amazon has a strong online presence and a diverse product range, its competitors like eBay and Barnes and Noble focus on specific niches and unique offerings to attract customers. Amazon's competitive advantage lies in its technological innovations, customer service quality, and convenience, while its competitors differentiate themselves through targeted strategies and product offerings.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\n\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\n\nDocument end\n']","Amazon's book competitors are pursuing an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. They believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. They offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. On the other hand, Amazon uses a huge amount of data storage to improve customer service and is planning to reduce investments in the Kindle and Cloud initiatives next year. They are also interested in long-term investments, such as video content and distribution centers.",multi_context,0.9999999999,0.7142857142857143,1.0,0.9523668132157023,0.44704411382876297 +What strategies are Amazon's book competitors using for market share and how does it compare to Amazon's?,global search+vector+fulltext,"Amazon's book competitors employ various strategies to compete for market share. eBay utilizes both cost leadership and differentiation strategies, aiming to offer competitive pricing and a unique value proposition to customers. Barnes and Noble integrates a focused cost leadership/differentiation strategy, focusing on specific market segments while offering differentiated products and services. Wal-Mart, as the world's largest retail chain, primarily focuses on a cost leadership approach, leveraging its scale to provide competitive prices. + +In comparison, Amazon employs a mix of strategies, heavily investing in technologies like video content, cloud services, and its Kindle product. It leverages technologies such as Elastic Block Storage (EBS) and 1-Click Ordering to enhance operations and customer experience. Amazon's strategic focus on innovation, technology, and global market expansion sets it apart from its competitors, allowing it to maintain a strong position in the e-commerce landscape.","[""Document start\nThis Document belongs to the source unknown\nContent: Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offer a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, being regulated by the Federal Trade Commission and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n----\nThe community consists of a group of individuals, Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht, who are enrolled in Bus 478, a seminar focused on business strategy. This course involves a collaborative or team-based approach, suggesting that these members work closely together on projects or discussions related to business strategy. Additionally, the community is linked to Amazon.com, a company founded in 1993 by Jeff Benzos. Originally an online bookstore, Amazon has evolved into a vast retail enterprise valued at $48 billion, offering a wide range of products across more than forty categories. The company saw substantial growth during the Dot-Com Bubble, with Jeff Benzos playing a crucial role in its expansion and success. This connection to Amazon may indicate that the course or group discussions could involve case studies or strategic analyses of Amazon's business model and growth trajectory.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\nDocument end\n""]","Amazon's book competitors are pursuing an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. They believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. They offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. On the other hand, Amazon uses a huge amount of data storage to improve customer service and is planning to reduce investments in the Kindle and Cloud initiatives next year. They are also interested in long-term investments, such as video content and distribution centers.",multi_context,0.9999999999,0.14285714285714285,0.45454545454545453,0.9568093643875176,0.6372330251599038 +What methods does ICPEN use for cross-border online consumer issues and its effect on retailers like Amazon?,vector,"ICPEN, the International Consumer Protection and Enforcement Network, addresses cross-border online consumer issues by finding ways to tackle problems in both goods and services transactions. This helps ensure fair benefits and mutual understanding among participants. For retailers like Amazon, it emphasizes the importance of disclosing online sales taxes accurately when advertising products internationally to avoid customer confusion due to varying tax rates in different countries.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n']",ICPEN finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers.,multi_context,0.9999999999,1.0,1.0,0.944883789738359,0.991201905586986 +What methods does ICPEN use for cross-border online consumer issues and its effect on retailers like Amazon?,fulltext,"ICPEN, the International Consumer Protection and Enforcement Network, addresses cross-border online consumer issues by finding ways to tackle problems involving transactions in both goods and services. This helps ensure fair benefits and mutual understanding among participants. For retailers like Amazon, it emphasizes the importance of disclosing online sales taxes accurately when advertising products internationally to avoid customer confusion. This transparency is crucial as different sales tax regulations can impact customer trust and satisfaction, especially in the context of online shopping platforms.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n']",ICPEN finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers.,multi_context,0.9999999999,1.0,0.6666666666666666,0.9448215167722637,0.9928426564003855 +What methods does ICPEN use for cross-border online consumer issues and its effect on retailers like Amazon?,entity search+vector,"ICPEN, the International Consumer Protection and Enforcement Network, addresses cross-border transactions to tackle consumer problems involving goods and services. By doing so, it ensures that participants equally benefit and reach mutual understanding. For retailers like Amazon, this means they need to be cautious about disclosing online sales taxes when advertising products internationally to avoid customer confusion due to varying tax rates. This transparency is crucial for maintaining consumer trust and compliance with international regulations.","[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- \t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n- Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n- \t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\ncommunities:\n- Neo4j is a comprehensive graph database management system that offers a wide range of products, services, and resources to support graph data science and related applications. It provides various deployment options, including Cloud Managed Services, Self-Hosted, and Graph-As-A-Service, with Amazon AWS as a cloud provider. Neo4j's offerings include the Neo4j Graph Data Science product, Neo4j GraphQL Library, Neo4j Workspace, Neo4j Bloom visualization tool, Neo4j Developer Tools, and Neo4j Data Connectors, all of which are integral parts of the Neo4j ecosystem.\n\nThe organization supports learning and development through resources like Graphacademy, which offers free online courses and certifications, and the Neo4j Blog for daily insights. It also provides a Resource Library with white papers and data sheets, Case Studies showcasing customer success stories, and Executive Insights on graph technology.\n\nNeo4j hosts various events, including the Neo4j Events Hub for live and on-demand training, webinars, and demos, the quarterly online conference Connections, and the global Graphsummit tour. The Cypher query language is a key technology used within Neo4j for exploring and manipulating graph data.\n\nAdditionally, Neo4j includes a company structure with information about its culture, diversity, leadership, and partnerships, offering opportunities to find or become a partner through its Partner Portal.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\nentities:\n- Organization:Icpen Finds ways to tackle consumer problems involving cross-border transactions.\n- Online_Purchasing \n- Concept:E-Commerce \n- Online_Shopping \n- Activity:Online Shopping \n- Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n- Organization:International Consumer Protection And Enforcement Network \n- Concept:Bargaining_Power_Of_Buyers High because customers can choose among numerous online stores for the lowest price.\n- Company:Amazon Aws \n- Online_Shopping_Experience \nrelationships:\n- E-Commerce ENABLES Online Shopping\n- Amazon BENEFIT Online Shopping\n- International Consumer Protection And Enforcement Network SUPPORTS Amazon\noutside:\n nodes:\n - Organization:Ebay Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Barnes And Noble Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Wal-Mart World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.\n - Location:China_And_India Emerging countries with large populations and growing disposable income.\n - Concept:Modern Tech Support \n - Concept:Web Security \n - Concept:3D_Virtual_Technology Technology that will take online shopping to new heights.\n - Group:Baby Boomers \n - Technology:3D Virtual Technology \n - Group:Consumers \n relationships:\n - Organization:Amazon COMPETITOR Organization:Ebay\n - Organization:Ebay COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Ebay\n - Organization:Amazon COMPETITOR Organization:Barnes And Noble\n - Organization:Barnes And Noble COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Barnes And Noble\n - Organization:Amazon COMPETITOR Organization:Wal-Mart\n - Organization:Amazon COMPETES_WITH Organization:Wal-Mart\n - Location:China_And_India OPPORTUNITY_FOR Online_Shopping\n - Concept:Modern Tech Support FACILITATES Activity:Online Shopping\n - Concept:Web Security FACILITATES Activity:Online Shopping\n - Concept:3D_Virtual_Technology ENHANCES Online_Shopping\n - Group:Baby Boomers LESS_INVOLVED Activity:Online Shopping\n - Technology:3D Virtual Technology FUTURE Activity:Online Shopping\n - Group:Consumers PREFERENCE Activity:Online Shopping\n\nDocument end\n""]",ICPEN finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers.,multi_context,0.9999999999,0.6666666666666666,0.8,0.8978613181525779,0.9922690115395673 +What methods does ICPEN use for cross-border online consumer issues and its effect on retailers like Amazon?,graph+vector,"ICPEN, the International Consumer Protection and Enforcement Network, addresses cross-border transactions by finding ways to tackle consumer problems involving such transactions in both goods and services. This helps ensure that participants equally benefit and reach mutual understanding. For retailers like Amazon, this means they need to be cautious about disclosing online sales taxes when advertising products to other countries via online shopping platforms to avoid customer confusion. By adhering to these regulations, online retailers can maintain transparency and trust with their international customers.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\nEntities:\n:Cost Leadership\n:Cross-Border_Transactions\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Online_Shopping\n:Online_Shopping_Experience\n:Smartphone_App\nActivity:Online Shopping\nConcept:3D_Virtual_Technology (Technology that will take online shopping to new heights.)\nConcept:Bargaining_Power_Of_Buyers (High because customers can choose among numerous online stores for the lowest price.)\nConcept:Bargaining_Power_Of_Suppliers (Low due to the substantial number of suppliers and the vast online global distribution network.)\nConcept:E-Commerce\nConcept:Electronic_Devices (Devices such as smartphones that deliver easy and convenient transaction processes.)\nConcept:Modern Tech Support\nConcept:Online_Payment_Methods (Methods such as online banking and PayPal that create convenience in purchase transactions.)\nConcept:Threat_Of_New_Entrants (Considered low due to low capital investment costs but high infrastructure and inventory requirements.)\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nGroup:Baby Boomers\nGroup:Consumers\nGroup:Generation X\nGroup:Generation Y\nLocation:China_And_India (Emerging countries with large populations and growing disposable income.)\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nMarket:Emerging Markets\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Icpen (Finds ways to tackle consumer problems involving cross-border transactions.)\nOrganization:International Consumer Protection And Enforcement Network\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nProduct:Kindle\nTechnology:1-Click Ordering\nTechnology:3D Virtual Technology\nTechnology:Internet\nTechnology:Online Payment Methods\nTechnology:Smartphones\n----\nRelationships:\nConcept:3D_Virtual_Technology ENHANCES :Online_Shopping\nConcept:E-Commerce ENABLES Activity:Online Shopping\nConcept:Electronic_Devices FACILITATES :Online_Shopping\nConcept:Modern Tech Support FACILITATES Activity:Online Shopping\nConcept:Online_Payment_Methods ENHANCES :Online_Shopping_Experience\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nGroup:Baby Boomers LESS_INVOLVED Activity:Online Shopping\nGroup:Consumers PREFERENCE Activity:Online Shopping\nGroup:Generation X TARGET_MARKET Activity:Online Shopping\nGroup:Generation Y TARGET_MARKET Activity:Online Shopping\nLocation:China_And_India OPPORTUNITY_FOR :Online_Shopping\nMarket:Emerging Markets GROWTH Activity:Online Shopping\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Icpen ADDRESSES :Cross-Border_Transactions\nOrganization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\nOrganization:Wal-Mart USES :Cost Leadership\nTechnology:3D Virtual Technology FUTURE Activity:Online Shopping\nTechnology:Internet SUPPORTS Activity:Online Shopping\nTechnology:Online Payment Methods ENHANCES Activity:Online Shopping\nTechnology:Smartphones FACILITATES Activity:Online Shopping\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\nEntities:\n:Cost Leadership\n:Cross-Border_Transactions\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Online_Shopping\n:Online_Shopping_Experience\n:Smartphone_App\nActivity:Online Shopping\nConcept:3D_Virtual_Technology (Technology that will take online shopping to new heights.)\nConcept:Bargaining_Power_Of_Buyers (High because customers can choose among numerous online stores for the lowest price.)\nConcept:Bargaining_Power_Of_Suppliers (Low due to the substantial number of suppliers and the vast online global distribution network.)\nConcept:E-Commerce\nConcept:Electronic_Devices (Devices such as smartphones that deliver easy and convenient transaction processes.)\nConcept:Modern Tech Support\nConcept:Online_Payment_Methods (Methods such as online banking and PayPal that create convenience in purchase transactions.)\nConcept:Threat_Of_New_Entrants (Considered low due to low capital investment costs but high infrastructure and inventory requirements.)\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nGroup:Baby Boomers\nGroup:Consumers\nGroup:Generation X\nGroup:Generation Y\nLocation:China_And_India (Emerging countries with large populations and growing disposable income.)\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nMarket:Emerging Markets\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Icpen (Finds ways to tackle consumer problems involving cross-border transactions.)\nOrganization:International Consumer Protection And Enforcement Network\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nProduct:Kindle\nTechnology:1-Click Ordering\nTechnology:3D Virtual Technology\nTechnology:Internet\nTechnology:Online Payment Methods\nTechnology:Smartphones\n----\nRelationships:\nConcept:3D_Virtual_Technology ENHANCES :Online_Shopping\nConcept:E-Commerce ENABLES Activity:Online Shopping\nConcept:Electronic_Devices FACILITATES :Online_Shopping\nConcept:Modern Tech Support FACILITATES Activity:Online Shopping\nConcept:Online_Payment_Methods ENHANCES :Online_Shopping_Experience\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nGroup:Baby Boomers LESS_INVOLVED Activity:Online Shopping\nGroup:Consumers PREFERENCE Activity:Online Shopping\nGroup:Generation X TARGET_MARKET Activity:Online Shopping\nGroup:Generation Y TARGET_MARKET Activity:Online Shopping\nLocation:China_And_India OPPORTUNITY_FOR :Online_Shopping\nMarket:Emerging Markets GROWTH Activity:Online Shopping\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Icpen ADDRESSES :Cross-Border_Transactions\nOrganization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\nOrganization:Wal-Mart USES :Cost Leadership\nTechnology:3D Virtual Technology FUTURE Activity:Online Shopping\nTechnology:Internet SUPPORTS Activity:Online Shopping\nTechnology:Online Payment Methods ENHANCES Activity:Online Shopping\nTechnology:Smartphones FACILITATES Activity:Online Shopping\nDocument end\n']",ICPEN finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers.,multi_context,0.9999999999,1.0,1.0,0.8978613181525779,0.9181702852415992 +What methods does ICPEN use for cross-border online consumer issues and its effect on retailers like Amazon?,graph+vector+fulltext,"ICPEN, the International Consumer Protection and Enforcement Network, addresses cross-border transactions issues by finding ways to tackle consumer problems involving both goods and services. This helps ensure that participants equally benefit and reach mutual understanding. For retailers like Amazon, it emphasizes the importance of careful disclosure of online sales taxes when advertising products to other countries via online shopping platforms to avoid customer confusion. By addressing these cross-border consumer issues, ICPEN contributes to a more transparent and consumer-friendly online shopping environment, impacting retailers like Amazon by promoting fair practices and consumer trust.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\nEntities:\n:Cost Leadership\n:Cross-Border_Transactions\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Online_Shopping\n:Online_Shopping_Experience\n:Smartphone_App\nActivity:Online Shopping\nConcept:3D_Virtual_Technology (Technology that will take online shopping to new heights.)\nConcept:Bargaining_Power_Of_Buyers (High because customers can choose among numerous online stores for the lowest price.)\nConcept:Bargaining_Power_Of_Suppliers (Low due to the substantial number of suppliers and the vast online global distribution network.)\nConcept:E-Commerce\nConcept:Electronic_Devices (Devices such as smartphones that deliver easy and convenient transaction processes.)\nConcept:Modern Tech Support\nConcept:Online_Payment_Methods (Methods such as online banking and PayPal that create convenience in purchase transactions.)\nConcept:Threat_Of_New_Entrants (Considered low due to low capital investment costs but high infrastructure and inventory requirements.)\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nGroup:Baby Boomers\nGroup:Consumers\nGroup:Generation X\nGroup:Generation Y\nLocation:China_And_India (Emerging countries with large populations and growing disposable income.)\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nMarket:Emerging Markets\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Icpen (Finds ways to tackle consumer problems involving cross-border transactions.)\nOrganization:International Consumer Protection And Enforcement Network\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nProduct:Kindle\nTechnology:1-Click Ordering\nTechnology:3D Virtual Technology\nTechnology:Internet\nTechnology:Online Payment Methods\nTechnology:Smartphones\n----\nRelationships:\nConcept:3D_Virtual_Technology ENHANCES :Online_Shopping\nConcept:E-Commerce ENABLES Activity:Online Shopping\nConcept:Electronic_Devices FACILITATES :Online_Shopping\nConcept:Modern Tech Support FACILITATES Activity:Online Shopping\nConcept:Online_Payment_Methods ENHANCES :Online_Shopping_Experience\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nGroup:Baby Boomers LESS_INVOLVED Activity:Online Shopping\nGroup:Consumers PREFERENCE Activity:Online Shopping\nGroup:Generation X TARGET_MARKET Activity:Online Shopping\nGroup:Generation Y TARGET_MARKET Activity:Online Shopping\nLocation:China_And_India OPPORTUNITY_FOR :Online_Shopping\nMarket:Emerging Markets GROWTH Activity:Online Shopping\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Icpen ADDRESSES :Cross-Border_Transactions\nOrganization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\nOrganization:Wal-Mart USES :Cost Leadership\nTechnology:3D Virtual Technology FUTURE Activity:Online Shopping\nTechnology:Internet SUPPORTS Activity:Online Shopping\nTechnology:Online Payment Methods ENHANCES Activity:Online Shopping\nTechnology:Smartphones FACILITATES Activity:Online Shopping\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \n----\n Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers. Demographic As the world’s population is aging rapidly, baby boomers are moving towards retirement so that they will have weaker purchasing power. Thus, generation X and Y have become companies’ target markets for product selling. These generations are technologically adept and tend to engage more in online purchasing. Based on a survey, 62 percent of Generation Y liked to purchase things online, as compared to 32 percent of those aged older than 50. Organizations \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\nEntities:\n:Cost Leadership\n:Cross-Border_Transactions\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Online_Shopping\n:Online_Shopping_Experience\n:Smartphone_App\nActivity:Online Shopping\nConcept:3D_Virtual_Technology (Technology that will take online shopping to new heights.)\nConcept:Bargaining_Power_Of_Buyers (High because customers can choose among numerous online stores for the lowest price.)\nConcept:Bargaining_Power_Of_Suppliers (Low due to the substantial number of suppliers and the vast online global distribution network.)\nConcept:E-Commerce\nConcept:Electronic_Devices (Devices such as smartphones that deliver easy and convenient transaction processes.)\nConcept:Modern Tech Support\nConcept:Online_Payment_Methods (Methods such as online banking and PayPal that create convenience in purchase transactions.)\nConcept:Threat_Of_New_Entrants (Considered low due to low capital investment costs but high infrastructure and inventory requirements.)\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nGroup:Baby Boomers\nGroup:Consumers\nGroup:Generation X\nGroup:Generation Y\nLocation:China_And_India (Emerging countries with large populations and growing disposable income.)\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nMarket:Emerging Markets\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Icpen (Finds ways to tackle consumer problems involving cross-border transactions.)\nOrganization:International Consumer Protection And Enforcement Network\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nProduct:Kindle\nTechnology:1-Click Ordering\nTechnology:3D Virtual Technology\nTechnology:Internet\nTechnology:Online Payment Methods\nTechnology:Smartphones\n----\nRelationships:\nConcept:3D_Virtual_Technology ENHANCES :Online_Shopping\nConcept:E-Commerce ENABLES Activity:Online Shopping\nConcept:Electronic_Devices FACILITATES :Online_Shopping\nConcept:Modern Tech Support FACILITATES Activity:Online Shopping\nConcept:Online_Payment_Methods ENHANCES :Online_Shopping_Experience\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nGroup:Baby Boomers LESS_INVOLVED Activity:Online Shopping\nGroup:Consumers PREFERENCE Activity:Online Shopping\nGroup:Generation X TARGET_MARKET Activity:Online Shopping\nGroup:Generation Y TARGET_MARKET Activity:Online Shopping\nLocation:China_And_India OPPORTUNITY_FOR :Online_Shopping\nMarket:Emerging Markets GROWTH Activity:Online Shopping\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Icpen ADDRESSES :Cross-Border_Transactions\nOrganization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\nOrganization:Wal-Mart USES :Cost Leadership\nTechnology:3D Virtual Technology FUTURE Activity:Online Shopping\nTechnology:Internet SUPPORTS Activity:Online Shopping\nTechnology:Online Payment Methods ENHANCES Activity:Online Shopping\nTechnology:Smartphones FACILITATES Activity:Online Shopping\nDocument end\n']",ICPEN finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers.,multi_context,0.9999999999,1.0,0.75,0.9525686118288258,0.9902459810706218 +What methods does ICPEN use for cross-border online consumer issues and its effect on retailers like Amazon?,global search+vector+fulltext,"ICPEN addresses cross-border online consumer issues by developing and implementing effective solutions to enhance consumer protection and ensure fair practices in the global marketplace. These efforts aim to facilitate smoother and more secure cross-border consumer interactions. For retailers like Amazon, being supported by ICPEN can contribute to building trust with consumers, ensuring compliance with regulations, and promoting transparency in online transactions, ultimately benefiting their international operations and reputation.","[""Document start\nThis Document belongs to the source unknown\nContent: The International Consumer Protection and Enforcement Network (ICPEN) is a dedicated organization that aims to address and resolve consumer issues arising from cross-border transactions. By focusing on the complexities and challenges that consumers encounter when engaging in international transactions, ICPEN works to develop and implement effective solutions. The organization plays a crucial role in ensuring consumer protection and fostering trust in the global marketplace by tackling these cross-border challenges.\n----\nThe International Consumer Protection and Enforcement Network (ICPEN) is a dedicated organization that aims to address and resolve consumer issues arising from cross-border transactions. By focusing on the complexities and challenges that consumers encounter when engaging in international transactions, ICPEN works to develop and implement effective solutions. The organization's efforts are centered on enhancing consumer protection and ensuring fair practices in the global marketplace, thereby facilitating smoother and more secure cross-border consumer interactions.\n----\nThe International Consumer Protection and Enforcement Network (ICPEN) is a dedicated organization that aims to address and resolve consumer issues arising from cross-border transactions. By focusing on the complexities and challenges that consumers encounter when engaging in international transactions, ICPEN works to develop and implement effective solutions. The organization's efforts are centered on enhancing consumer protection and ensuring fair practices in the global marketplace, thereby facilitating smoother and more secure cross-border consumer interactions.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, being regulated by the Federal Trade Commission and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offer a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nThe International Consumer Protection and Enforcement Network (ICPEN) is an organization focused on addressing consumer issues related to cross-border transactions. Through its efforts, ICPEN seeks to find effective solutions to tackle the challenges consumers face when engaging in transactions that span different countries.\n----\nAmazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\nDocument end\n""]",ICPEN finds ways to tackle consumer problems involving cross-border transactions in both goods and services. This helps to ensure participants equally benefit and reach mutual understanding. Online retailers such as Amazon should be careful to disclose online sales taxes while advertising products to other countries via online shopping platforms because different sales taxes can cause confusion to customers.,multi_context,0.9999999999,0.6666666666666666,0.8,0.9075174635825679,0.6095251499369276 +"What strategies do Amazon's book competitors use to attract price-sensitive customers and grow market share, leveraging online shopping and unique, cheap books?",vector,"Amazon's book competitors, like Barnes and Noble, employ an integrated focused cost leadership/differentiation strategy to target price-sensitive customers. They achieve this by providing books that are not readily available at a lower price point. These competitors aim to increase profitability, gain market share, and offer a wide variety of products through competitive pricing, cost leadership, and differentiation strategies. By focusing on providing unique and affordable books online, they aim to attract price-sensitive customers and compete effectively in the market against Amazon.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \nDocument end\n']","Amazon's competitors in the book industry use an integrated focused cost leadership/differentiation strategy to attract price-sensitive customers. They provide books that are not readily or commonly available at a less expensive price. They aim to increase profitability and gain market share by offering a wide variety of products to compete against Amazon, utilizing competitive pricing, cost leadership, and differentiation strategies. They also believe that the demand for online shopping will continue to grow and offer similar products as Amazon either at a lower price or accompanied with differentiated services.",multi_context,0.9999999999,1.0,0.7272727272727273,0.9553378029712812,0.9058522877045622 +"What strategies do Amazon's book competitors use to attract price-sensitive customers and grow market share, leveraging online shopping and unique, cheap books?",fulltext,"Amazon's book competitors, such as Barnes and Noble, employ an integrated focused cost leadership/differentiation strategy to target price-sensitive customers. They aim to attract these customers by offering books that are not readily available at a lower price. These competitors focus on increasing profitability, gaining market share, and providing a wide variety of products through competitive pricing, cost leadership, and differentiation strategies. By utilizing online shopping platforms and offering unique, affordable books, they aim to compete against Amazon and capture a larger market share in the book industry.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \nDocument end\n']","Amazon's competitors in the book industry use an integrated focused cost leadership/differentiation strategy to attract price-sensitive customers. They provide books that are not readily or commonly available at a less expensive price. They aim to increase profitability and gain market share by offering a wide variety of products to compete against Amazon, utilizing competitive pricing, cost leadership, and differentiation strategies. They also believe that the demand for online shopping will continue to grow and offer similar products as Amazon either at a lower price or accompanied with differentiated services.",multi_context,0.9999999999,1.0,0.8333333333333334,0.9553023290348591,0.995587268131287 +"What strategies do Amazon's book competitors use to attract price-sensitive customers and grow market share, leveraging online shopping and unique, cheap books?",entity search+vector,"Amazon's book competitors like Barnes and Noble employ strategies to attract price-sensitive customers and expand market share by leveraging online shopping and offering unique, affordable books. These competitors often utilize a combination of cost leadership and differentiation strategies. Cost leadership involves offering competitive prices to attract price-sensitive customers, while differentiation focuses on providing unique book selections or exclusive deals to stand out in the market. By combining these strategies, competitors aim to appeal to a wide range of customers seeking affordable yet distinctive book options in the online shopping landscape.","[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- \t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n- providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n- \t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \ncommunities:\n- Neo4j is a comprehensive graph database management system that offers a wide range of products, services, and resources to support graph data science and related applications. It provides various deployment options, including Cloud Managed Services, Self-Hosted, and Graph-As-A-Service, with Amazon AWS as a cloud provider. Neo4j's offerings include the Neo4j Graph Data Science product, Neo4j GraphQL Library, Neo4j Workspace, Neo4j Bloom visualization tool, Neo4j Developer Tools, and Neo4j Data Connectors, all of which are integral parts of the Neo4j ecosystem.\n\nThe organization supports learning and development through resources like Graphacademy, which offers free online courses and certifications, and the Neo4j Blog for daily insights. It also provides a Resource Library with white papers and data sheets, Case Studies showcasing customer success stories, and Executive Insights on graph technology.\n\nNeo4j hosts various events, including the Neo4j Events Hub for live and on-demand training, webinars, and demos, the quarterly online conference Connections, and the global Graphsummit tour. The Cypher query language is a key technology used within Neo4j for exploring and manipulating graph data.\n\nAdditionally, Neo4j includes a company structure with information about its culture, diversity, leadership, and partnerships, offering opportunities to find or become a partner through its Partner Portal.\n- Amazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\nrelationships:\n- Barnes And Noble COMPETES_WITH Amazon\n- Amazon COMPETITOR Barnes And Noble\n- Amazon COMPETES_WITH Barnes And Noble\n- Amazon BENEFIT Online Shopping\n- E-Commerce ENABLES Online Shopping\nentities:\n- Organization:Barnes And Noble Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n- Company:Amazon.Com Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.\n- Concept:Bargaining_Power_Of_Buyers High because customers can choose among numerous online stores for the lowest price.\n- Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n- Activity:Online Shopping \n- Online_Shopping \n- Concept:E-Commerce \n- Company:Amazon Aws \n- Online_Purchasing \n- Online_Shopping_Experience \noutside:\n nodes:\n - Organization:Ebay Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Wal-Mart World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.\n - Event:Dot-Com Bubble \n - Course:Bus 478 Seminar on Business Strategy\n - Person:Jeff Benzos Founder of Amazon.com\n - Organization:International Consumer Protection And Enforcement Network \n - Organization:Federal Trade Commission \n - Date:1993 \n - Country:India Emerging country with large population and growing disposable income.\n - Amount:$48 Billion \n relationships:\n - Organization:Amazon COMPETITOR Organization:Ebay\n - Organization:Ebay COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Ebay\n - Organization:Amazon COMPETITOR Organization:Wal-Mart\n - Organization:Amazon COMPETES_WITH Organization:Wal-Mart\n - Company:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\n - Course:Bus 478 PROJECT Company:Amazon.Com\n - Company:Amazon.Com FOUNDER Person:Jeff Benzos\n - Organization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\n - Organization:Federal Trade Commission REGULATES Organization:Amazon\n - Company:Amazon.Com FOUNDED Date:1993\n - Organization:Amazon OPPORTUNITY Country:India\n - Company:Amazon.Com VALUATION Amount:$48 Billion\n\nDocument end\n""]","Amazon's competitors in the book industry use an integrated focused cost leadership/differentiation strategy to attract price-sensitive customers. They provide books that are not readily or commonly available at a less expensive price. They aim to increase profitability and gain market share by offering a wide variety of products to compete against Amazon, utilizing competitive pricing, cost leadership, and differentiation strategies. They also believe that the demand for online shopping will continue to grow and offer similar products as Amazon either at a lower price or accompanied with differentiated services.",multi_context,0.9999999999,0.5,0.25,0.9615121586090064,0.7877951865157651 +"What strategies do Amazon's book competitors use to attract price-sensitive customers and grow market share, leveraging online shopping and unique, cheap books?",graph+vector,"Amazon's book competitors, such as Barnes and Noble, employ an integrated focused cost leadership/differentiation strategy to target price-sensitive customers. They achieve this by offering books that are not readily available at a lower price point. These competitors aim to increase profitability, gain market share, and provide a wide variety of products through competitive pricing, cost leadership, and differentiation strategies. By focusing on unique and affordable book offerings, along with leveraging online shopping platforms, they aim to attract price-sensitive customers and compete effectively in the market against Amazon.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\n\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\n\nDocument end\n']","Amazon's competitors in the book industry use an integrated focused cost leadership/differentiation strategy to attract price-sensitive customers. They provide books that are not readily or commonly available at a less expensive price. They aim to increase profitability and gain market share by offering a wide variety of products to compete against Amazon, utilizing competitive pricing, cost leadership, and differentiation strategies. They also believe that the demand for online shopping will continue to grow and offer similar products as Amazon either at a lower price or accompanied with differentiated services.",multi_context,0.9999999999,1.0,0.8,0.9550482196460813,0.898103060703844 +"What strategies do Amazon's book competitors use to attract price-sensitive customers and grow market share, leveraging online shopping and unique, cheap books?",graph+vector+fulltext,"Amazon's book competitors, such as Barnes and Noble, employ an integrated focused cost leadership/differentiation strategy to target price-sensitive customers and expand their market share. They achieve this by offering books that are not commonly available at a lower price point. These competitors aim to increase profitability, gain market share, and provide a wide variety of products through competitive pricing, cost leadership, and differentiation strategies. By leveraging online shopping platforms and providing unique, affordable books, they aim to attract customers seeking cost-effective options and enhance their presence in the market.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n tanked, many people chose to migrate their shopping onto an online platform. In 2011, online sales actually increased by 11 percent despite what people imagined. The reason this has \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nTechnology:Internet SUPPORTS Activity:Online Shopping\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n tanked, many people chose to migrate their shopping onto an online platform. In 2011, online sales actually increased by 11 percent despite what people imagined. The reason this has \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nTechnology:Internet SUPPORTS Activity:Online Shopping\nDocument end\n']","Amazon's competitors in the book industry use an integrated focused cost leadership/differentiation strategy to attract price-sensitive customers. They provide books that are not readily or commonly available at a less expensive price. They aim to increase profitability and gain market share by offering a wide variety of products to compete against Amazon, utilizing competitive pricing, cost leadership, and differentiation strategies. They also believe that the demand for online shopping will continue to grow and offer similar products as Amazon either at a lower price or accompanied with differentiated services.",multi_context,0.9999999999,1.0,1.0,0.9553023290348591,0.9175976729605386 +"What strategies do Amazon's book competitors use to attract price-sensitive customers and grow market share, leveraging online shopping and unique, cheap books?",global search+vector+fulltext,"Amazon's book competitors employ various strategies to attract price-sensitive customers and expand their market share in the realm of online shopping. eBay, Barnes and Noble, and Wal-Mart utilize different approaches to cater to this segment: + +1. **eBay**: Utilizes both cost leadership and differentiation strategies to appeal to price-sensitive customers. It offers a wide range of products, including books, at competitive prices while also providing a unique platform for auctions and second-hand items. + +2. **Barnes and Noble**: Integrates a focused cost leadership/differentiation strategy to stand out in the market. By offering a mix of affordable books along with unique and high-quality selections, they target price-sensitive customers seeking value and variety. + +3. **Wal-Mart**: Focuses on a cost leadership approach, leveraging its vast retail chain to provide competitive pricing on books and other products. By emphasizing affordability and a wide selection, Wal-Mart aims to attract price-sensitive customers looking for budget-friendly options. + +These competitors aim to leverage online shopping platforms to reach a broader audience of price-sensitive customers while offering unique and affordable book selections to enhance their market share in the competitive e-commerce landscape.","[""Document start\nThis Document belongs to the source unknown\nContent: Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, being regulated by the Federal Trade Commission and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offer a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nThe community consists of a group of individuals, Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht, who are enrolled in Bus 478, a seminar focused on business strategy. This course involves a collaborative or team-based approach, suggesting that these members work closely together on projects or discussions related to business strategy. Additionally, the community is linked to Amazon.com, a company founded in 1993 by Jeff Benzos. Originally an online bookstore, Amazon has evolved into a vast retail enterprise valued at $48 billion, offering a wide range of products across more than forty categories. The company saw substantial growth during the Dot-Com Bubble, with Jeff Benzos playing a crucial role in its expansion and success. This connection to Amazon may indicate that the course or group discussions could involve case studies or strategic analyses of Amazon's business model and growth trajectory.\n----\nAmazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\nDocument end\n""]","Amazon's competitors in the book industry use an integrated focused cost leadership/differentiation strategy to attract price-sensitive customers. They provide books that are not readily or commonly available at a less expensive price. They aim to increase profitability and gain market share by offering a wide variety of products to compete against Amazon, utilizing competitive pricing, cost leadership, and differentiation strategies. They also believe that the demand for online shopping will continue to grow and offer similar products as Amazon either at a lower price or accompanied with differentiated services.",multi_context,0.9999999999,0.5,0.14285714285714285,0.967728078973633,0.8039721568085061 +"What strategies are Amazon's book rivals using for price-sensitive customers and market growth, and its impact on Amazon's long-term investment focus?",vector,"Amazon's book rivals, such as Barnes and Noble, are employing an integrated focused cost leadership/differentiation strategy to target price-sensitive customers. They aim to offer books not readily available at a lower price to compete with Amazon. This strategy impacts Amazon's long-term investment focus by potentially influencing the need for margin expansion to prevent share price drops. While Amazon focuses on long-term investments like video content and distribution centers, the competition's pricing strategies may necessitate Amazon to balance its investments to remain competitive in the market.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\nDocument end\n']","Amazon's book rivals are using an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through competitive pricing, cost leadership, and differentiation strategies. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. This intense competition impacts Amazon's long-term investment focus, as the company needs to keep investing in new projects that generate more revenue to reflect the value of the company, despite the increasing expenditures in some of its services.",multi_context,0.9999999999,0.75,0.6363636363636364,0.9149586709793368,0.544215279154729 +"What strategies are Amazon's book rivals using for price-sensitive customers and market growth, and its impact on Amazon's long-term investment focus?",fulltext,"Amazon's book rivals, such as Barnes and Noble, are employing an integrated focused cost leadership/differentiation strategy to target price-sensitive customers. They achieve this by offering books not readily available at a lower price. This strategy aims to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon. The impact on Amazon's long-term investment focus is that while Amazon is interested in long-term investments like video content and distribution centers, it faces challenges in predicting the realization of benefits from these investments due to the need for margin expansion to prevent share price drops.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\nDocument end\n']","Amazon's book rivals are using an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through competitive pricing, cost leadership, and differentiation strategies. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. This intense competition impacts Amazon's long-term investment focus, as the company needs to keep investing in new projects that generate more revenue to reflect the value of the company, despite the increasing expenditures in some of its services.",multi_context,0.9999999999,1.0,0.9,0.9429061355024674,0.8209237046096777 +"What strategies are Amazon's book rivals using for price-sensitive customers and market growth, and its impact on Amazon's long-term investment focus?",entity search+vector,"Amazon's book rivals, like Barnes and Noble, are likely employing strategies to attract price-sensitive customers by offering competitive pricing, discounts, and promotions. To drive market growth, these rivals may focus on expanding their product offerings, enhancing customer experience, and investing in marketing campaigns. These strategies could impact Amazon's long-term investment focus by potentially prompting Amazon to adjust its pricing strategies, invest more in customer retention and acquisition, or explore new market segments to maintain its competitive edge in the evolving e-commerce landscape.","[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n- \t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n- \t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \ncommunities:\n- Neo4j is a comprehensive graph database management system that offers a wide range of products, services, and resources to support graph data science and related applications. It provides various deployment options, including Cloud Managed Services, Self-Hosted, and Graph-As-A-Service, with Amazon AWS as a cloud provider. Neo4j's offerings include the Neo4j Graph Data Science product, Neo4j GraphQL Library, Neo4j Workspace, Neo4j Bloom visualization tool, Neo4j Developer Tools, and Neo4j Data Connectors, all of which are integral parts of the Neo4j ecosystem.\n\nThe organization supports learning and development through resources like Graphacademy, which offers free online courses and certifications, and the Neo4j Blog for daily insights. It also provides a Resource Library with white papers and data sheets, Case Studies showcasing customer success stories, and Executive Insights on graph technology.\n\nNeo4j hosts various events, including the Neo4j Events Hub for live and on-demand training, webinars, and demos, the quarterly online conference Connections, and the global Graphsummit tour. The Cypher query language is a key technology used within Neo4j for exploring and manipulating graph data.\n\nAdditionally, Neo4j includes a company structure with information about its culture, diversity, leadership, and partnerships, offering opportunities to find or become a partner through its Partner Portal.\n- Amazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\nentities:\n- Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n- Company:Amazon Aws \n- Company:Amazon.Com Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.\n- Concept:Bargaining_Power_Of_Buyers High because customers can choose among numerous online stores for the lowest price.\n- Organization:Barnes And Noble Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n- Group:Consumers \n- Electronic_Commerce_Firms \n- Concept:E-Commerce \n- Organization:Amazon Cooperation Treaty Organization \n- Global_Market \nrelationships:\n- Amazon COMPETITOR Barnes And Noble\n- Amazon COMPETES_WITH Barnes And Noble\n- Amazon CAPTURES Global_Market\n- Barnes And Noble COMPETES_WITH Amazon\noutside:\n nodes:\n - Organization:Ebay Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Activity:Online Shopping \n - Organization:Wal-Mart World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.\n - Organization:Federal Trade Commission \n - Country:India Emerging country with large population and growing disposable income.\n - Country:China Emerging country with large population and growing disposable income.\n - Technology:Cloud \n - Product:Video Content \n - Technology:Elastic Block Storage (Ebs) \n - Organization:International Consumer Protection And Enforcement Network \n relationships:\n - Organization:Amazon COMPETITOR Organization:Ebay\n - Organization:Ebay COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Ebay\n - Organization:Amazon BENEFIT Activity:Online Shopping\n - Group:Consumers PREFERENCE Activity:Online Shopping\n - Concept:E-Commerce ENABLES Activity:Online Shopping\n - Organization:Amazon COMPETITOR Organization:Wal-Mart\n - Organization:Amazon COMPETES_WITH Organization:Wal-Mart\n - Organization:Federal Trade Commission REGULATES Organization:Amazon\n - Organization:Amazon OPPORTUNITY Country:India\n - Organization:Amazon OPPORTUNITY Country:China\n - Organization:Amazon INVESTS_IN Technology:Cloud\n - Organization:Amazon INVESTS_IN Product:Video Content\n - Organization:Amazon USES Technology:Elastic Block Storage (Ebs)\n - Organization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\n\nDocument end\n""]","Amazon's book rivals are using an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through competitive pricing, cost leadership, and differentiation strategies. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. This intense competition impacts Amazon's long-term investment focus, as the company needs to keep investing in new projects that generate more revenue to reflect the value of the company, despite the increasing expenditures in some of its services.",multi_context,0.9999999999,0.5,0.0,0.9504468591459928,0.7167896146801386 +"What strategies are Amazon's book rivals using for price-sensitive customers and market growth, and its impact on Amazon's long-term investment focus?",graph+vector,"Amazon's book rivals, such as Barnes and Noble, are employing an integrated focused cost leadership/differentiation strategy to target price-sensitive customers. They achieve this by offering books not commonly available at a lower price. This strategy aims to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon. The impact of these strategies on Amazon's long-term investment focus is significant. Amazon is focusing on long-term investments like video content and distribution centers, but challenges arise due to the need for margin expansion to prevent share price drops. Despite these challenges, Amazon must continue investing in new revenue-generating projects to maintain competitiveness in the industry.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER\nDocument end\n']","Amazon's book rivals are using an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through competitive pricing, cost leadership, and differentiation strategies. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. This intense competition impacts Amazon's long-term investment focus, as the company needs to keep investing in new projects that generate more revenue to reflect the value of the company, despite the increasing expenditures in some of its services.",multi_context,0.9999999999,1.0,0.9090909090909091,0.8989058196332955,0.7933393991795576 +"What strategies are Amazon's book rivals using for price-sensitive customers and market growth, and its impact on Amazon's long-term investment focus?",graph+vector+fulltext,"Amazon's book rivals, such as Barnes and Noble, are employing an integrated focused cost leadership/differentiation strategy to target price-sensitive customers. They achieve this by offering books not commonly available at a lower price. This strategy aims to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon. The impact on Amazon's long-term investment focus is that while Amazon is interested in long-term investments like video content and distribution centers, it faces challenges in predicting the realization of benefits from these investments due to the need for margin expansion to prevent share price drops.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Purchasing\n:Online_Sales_Taxes\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nDemographic_group:Generation_X_And_Y (Technologically adept generations that engage more in online purchasing.)\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nDemographic_group:Generation_X_And_Y ENGAGES_IN :Online_Purchasing\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER\nDocument end\n']","Amazon's book rivals are using an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through competitive pricing, cost leadership, and differentiation strategies. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. This intense competition impacts Amazon's long-term investment focus, as the company needs to keep investing in new projects that generate more revenue to reflect the value of the company, despite the increasing expenditures in some of its services.",multi_context,0.9999999999,1.0,0.9,0.9429061355024674,0.7996236241211843 +"What strategies are Amazon's book rivals using for price-sensitive customers and market growth, and its impact on Amazon's long-term investment focus?",global search+vector+fulltext,"eBay, Barnes and Noble, and Wal-Mart, as Amazon's book rivals, employ distinct strategies tailored for price-sensitive customers and market expansion. eBay combines cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart emphasizes cost leadership. These strategies influence how these competitors attract price-sensitive customers and drive market growth. Amazon, in response, focuses on long-term investments in technologies like video content, cloud services, and product innovation to maintain its competitive edge and global market presence amidst these strategic dynamics.","[""Document start\nThis Document belongs to the source unknown\nContent: The community consists of a group of individuals, Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht, who are enrolled in Bus 478, a seminar focused on business strategy. This course involves a collaborative or team-based approach, suggesting that these members work closely together on projects or discussions related to business strategy. Additionally, the community is linked to Amazon.com, a company founded in 1993 by Jeff Benzos. Originally an online bookstore, Amazon has evolved into a vast retail enterprise valued at $48 billion, offering a wide range of products across more than forty categories. The company saw substantial growth during the Dot-Com Bubble, with Jeff Benzos playing a crucial role in its expansion and success. This connection to Amazon may indicate that the course or group discussions could involve case studies or strategic analyses of Amazon's business model and growth trajectory.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, being regulated by the Federal Trade Commission and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offer a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nGeneration X and Y, recognized for their proficiency with technology, are demographic cohorts that frequently participate in online shopping activities. These groups are characterized by their comfort and familiarity with digital platforms, which facilitates their engagement in e-commerce. Their technological adeptness not only influences their purchasing habits but also reflects broader trends in consumer behavior, where digital literacy plays a crucial role in shaping market dynamics. As active participants in the online marketplace, Generation X and Y contribute significantly to the growth and evolution of e-commerce, highlighting the importance of targeting these demographics in digital marketing strategies.\n----\nThe online shopping landscape is undergoing a transformative enhancement through the integration of 3D virtual technology, which aims to significantly elevate the consumer experience. This technological advancement is complemented by the widespread use of electronic devices, particularly smartphones, which are streamlining and simplifying transaction processes for online shoppers. Furthermore, emerging markets, notably China and India, are poised to play a pivotal role in the expansion of online shopping. These countries, characterized by their vast populations and rising disposable incomes, offer substantial opportunities for growth and development in the online retail sector.\nDocument end\n""]","Amazon's book rivals are using an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through competitive pricing, cost leadership, and differentiation strategies. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. This intense competition impacts Amazon's long-term investment focus, as the company needs to keep investing in new projects that generate more revenue to reflect the value of the company, despite the increasing expenditures in some of its services.",multi_context,0.9999999999,0.25,0.5333333333333333,0.9003929990624776,0.7177808409752562 +What were the 2012 issues with Amazon's EBS at the U.S. East center and their impact on customer service and future investments?,vector,"In 2012, Amazon's Elastic Block Storage (EBS) service experienced multiple downtimes at the U.S. East data center in Virginia. These outages had a significant impact on customer service, causing inconvenience and discomfort for regular website users. As a result, Amazon faced challenges in maintaining customer satisfaction and reliability. These issues highlighted the importance of major upgrades to prevent future service disruptions and emphasized the need for strategic investments in infrastructure to enhance the resilience of their data centers and improve overall customer experience.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \nDocument end\n']","In 2012, Amazon's Elastic Block Storage (EBS) service at the U.S. East data center in Virginia experienced several outages due to an electric outage. This was a significant issue as this data center is the biggest and oldest one. These outages caused inconvenience and discomfort to regular users of the website. The company recognized the need for major upgrades to this data center to prevent future service disruptions. The issues also impacted future investments, with Amazon planning to reduce investments in initiatives like Kindle and Cloud.",multi_context,0.9999999999,1.0,0.5,0.932155520829277,0.868596089410279 +What were the 2012 issues with Amazon's EBS at the U.S. East center and their impact on customer service and future investments?,fulltext,"In 2012, Amazon's Elastic Block Storage (EBS) service experienced multiple downtimes at the U.S. East data center in Virginia. These outages had a significant impact on customer service, causing inconvenience and discomfort for regular website users. As a result, Amazon needed to make major upgrades to the U.S. East data center to prevent such service disruptions in the future. These issues highlighted the importance of investing in infrastructure to ensure reliable services and maintain customer satisfaction.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n since the existing major competitors, such as Amazon, have established a loyal customer base. Bargaining Power of Suppliers Suppliers have little bargaining power due to the substantial number of suppliers available and the vast expanse of the online global distribution network. Many suppliers rely on key online retailers such as Amazon to engage in bulk purchasing. Thus, online retailers can easily switch to another supplier for lower price and better quality. Bargaining power of suppliers to Amazon is particularly low since suppliers do not require payment until 35 days after the confirmation of sales. Bargaining Power of Buyers The bargaining power of buyers is high. Customers have the options to choose among numerous online stores for the lowest price products and services due to the completely available \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\nDocument end\n']","In 2012, Amazon's Elastic Block Storage (EBS) service at the U.S. East data center in Virginia experienced several outages due to an electric outage. This was a significant issue as this data center is the biggest and oldest one. These outages caused inconvenience and discomfort to regular users of the website. The company recognized the need for major upgrades to this data center to prevent future service disruptions. The issues also impacted future investments, with Amazon planning to reduce investments in initiatives like Kindle and Cloud.",multi_context,0.9999999999,1.0,0.5555555555555556,0.9321494024653637,0.8578405670500375 +What were the 2012 issues with Amazon's EBS at the U.S. East center and their impact on customer service and future investments?,graph+vector,"In 2012, Amazon's Elastic Block Storage (EBS) service at the U.S. East data center experienced multiple outages, impacting customer service reliability. These issues caused inconvenience and discomfort for customers relying on Amazon's services. As a result, Amazon needed to make major upgrades to the U.S. East data center to prevent future service disruptions. These incidents highlighted the importance of investing in infrastructure to ensure uninterrupted service delivery and maintain customer satisfaction.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Online_Shopping_Experience\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nConcept:Online_Payment_Methods (Methods such as online banking and PayPal that create convenience in purchase transactions.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nConcept:Online_Payment_Methods ENHANCES :Online_Shopping_Experience\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Online_Shopping_Experience\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nConcept:Online_Payment_Methods (Methods such as online banking and PayPal that create convenience in purchase transactions.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nConcept:Online_Payment_Methods ENHANCES :Online_Shopping_Experience\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nDocument end\n']","In 2012, Amazon's Elastic Block Storage (EBS) service at the U.S. East data center in Virginia experienced several outages due to an electric outage. This was a significant issue as this data center is the biggest and oldest one. These outages caused inconvenience and discomfort to regular users of the website. The company recognized the need for major upgrades to this data center to prevent future service disruptions. The issues also impacted future investments, with Amazon planning to reduce investments in initiatives like Kindle and Cloud.",multi_context,0.9999999999,1.0,0.7777777777777778,0.9339821431522823,0.8298531266923304 +What were the 2012 issues with Amazon's EBS at the U.S. East center and their impact on customer service and future investments?,graph+vector+fulltext,"In 2012, Amazon faced several downtimes with its Elastic Block Storage (EBS) service at the U.S. East data center, leading to disruptions in customer service. These outages caused inconvenience and discomfort for customers who regularly used the website. As a result, Amazon needed to make major upgrades to the U.S. East data center to prevent such service interruptions in the future. These incidents highlighted the importance of investing in infrastructure to ensure reliable services and maintain customer satisfaction.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Online_Shopping_Experience\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nConcept:Online_Payment_Methods (Methods such as online banking and PayPal that create convenience in purchase transactions.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nConcept:Online_Payment_Methods ENHANCES :Online_Shopping_Experience\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 7 electric outage. The main problem comes from the Amazon’s U.S. East data center in Virginia, which is the biggest and the oldest data center that has been causing outage issues. The company needs to do major upgrades on the U.S. East data center to prevent the services from going down in the future. Otherwise, the server crashes can bring inconvenience and discomfort to the customers who use the website regularly. Some of Amazon’s services such as Kindle and Cloud have been increasing expenditures throughout the company. Amazon is planning to reduce investments in the Kindle and Cloud initiatives next year. In order to remain competitive in the industry, Amazon should keep investing in new projects that generate more revenue to reflect the value of the company. While Amazon’s main interest is in long-term investments, such as video content and distribution centers, it is difficult to predict when the benefits from these investments will be realized\n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Online_Shopping_Experience\n:Smartphone_App\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nConcept:Online_Payment_Methods (Methods such as online banking and PayPal that create convenience in purchase transactions.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nConcept:Online_Payment_Methods ENHANCES :Online_Shopping_Experience\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nDocument end\n']","In 2012, Amazon's Elastic Block Storage (EBS) service at the U.S. East data center in Virginia experienced several outages due to an electric outage. This was a significant issue as this data center is the biggest and oldest one. These outages caused inconvenience and discomfort to regular users of the website. The company recognized the need for major upgrades to this data center to prevent future service disruptions. The issues also impacted future investments, with Amazon planning to reduce investments in initiatives like Kindle and Cloud.",multi_context,0.9999999999,1.0,0.7777777777777778,0.9227835889993118,0.8289740705888251 +How do rivals target cost-conscious readers and compete with Amazon?,vector,"Rivals target cost-conscious readers and compete with Amazon by employing strategies like focused cost leadership and differentiation. For example, Barnes and Noble, a major competitor, uses an integrated focused cost leadership/differentiation strategy to offer books at competitive prices and provide unique selections not easily found elsewhere. This approach aims to attract price-sensitive consumers while providing value through product variety and quality service.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n']","Rivals target cost-conscious readers and compete with Amazon by pursuing an integrated focused cost leadership/differentiation strategy. They provide books that are not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers.",reasoning,0.9999999999,1.0,1.0,0.9855041512534686,0.721496872867419 +How do rivals target cost-conscious readers and compete with Amazon?,fulltext,"Rivals target cost-conscious readers and compete with Amazon by employing strategies like focused cost leadership and differentiation. They aim to offer products not commonly available at lower prices, utilize competitive pricing, and provide differentiated services. By focusing on cost-effective approaches, providing a wide variety of products, and enhancing customer service, competitors strive to attract and retain cost-conscious readers in the market while challenging Amazon's market share.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: \t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\nDocument end\n']","Rivals target cost-conscious readers and compete with Amazon by pursuing an integrated focused cost leadership/differentiation strategy. They provide books that are not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers.",reasoning,0.9999999999,1.0,0.8181818181818182,0.9742459416659807,0.6190315441384282 +How do rivals target cost-conscious readers and compete with Amazon?,entity search+vector,"Rivals target cost-conscious readers and compete with Amazon by offering competitive pricing, deals, and discounts to attract price-sensitive customers. They may also focus on providing value-added services, unique product selections, and efficient delivery options to differentiate themselves from Amazon. Additionally, rivals can leverage their brand reputation, customer service, and loyalty programs to build customer trust and loyalty in the face of Amazon's dominance in the market.","[""Document start\nThis Document belongs to the source unknown\nContent: chunks:\n- providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n- \t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n- \t\r \xa0 3 online payment methods such as online banking and PayPal create more convenience in purchase transactions, which in turn enhances customers’ online shopping experience. Also, the invention of electronic devices such as smartphones deliver easy and convenient transaction processes, further facilitating online shopping. For instance, Amazon recently released a smartphone app for their Kindle services, allowing users to conveniently access the online sales platform and review products before purchasing. In the future, the use of 3D virtual technology to market products will also take online shopping to new heights. Political/Legal In the modern day environment, firms must be cautious when they deal with international policies on online distribution. In the U.S., electronic commerce firms are regulated by the Federal Trade Commission, which regulates online advertising and the security of consumers’ personal information. On the other hand, International Consumer Protection and Enforcement Network (ICPEN) finds ways to tackle consumer problems involving cross-border transactions \ncommunities:\n- Amazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n- Amazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\nentities:\n- Organization:Amazon Fastest-growing, multinational e-commerce retailer in America with international presence.\n- Company:Amazon.Com Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.\n- Organization:Barnes And Noble Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n- Group:Consumers \n- Online_Shopping \n- Concept:E-Commerce \n- Product:Kindle \n- Activity:Online Shopping \n- Online_Purchasing \n- Online_Shopping_Experience \nrelationships:\n- Amazon COMPETITOR Barnes And Noble\n- Amazon COMPETES_WITH Barnes And Noble\n- Amazon INVESTS_IN Kindle\n- Amazon BENEFIT Online Shopping\n- Barnes And Noble COMPETES_WITH Amazon\n- Consumers PREFERENCE Online Shopping\n- E-Commerce ENABLES Online Shopping\noutside:\n nodes:\n - Organization:Ebay Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.\n - Organization:Wal-Mart World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.\n - Country:China Emerging country with large population and growing disposable income.\n - Location:Distribution Centers \n - Technology:Cloud \n - Product:Video Content \n - Country:India Emerging country with large population and growing disposable income.\n - Technology:Elastic Block Storage (Ebs) \n - Organization:Federal Trade Commission \n - Organization:International Consumer Protection And Enforcement Network \n relationships:\n - Organization:Amazon COMPETITOR Organization:Ebay\n - Organization:Ebay COMPETES_WITH Organization:Amazon\n - Organization:Amazon COMPETES_WITH Organization:Ebay\n - Organization:Amazon COMPETITOR Organization:Wal-Mart\n - Organization:Amazon COMPETES_WITH Organization:Wal-Mart\n - Organization:Amazon OPPORTUNITY Country:China\n - Organization:Amazon INVESTS_IN Location:Distribution Centers\n - Organization:Amazon INVESTS_IN Technology:Cloud\n - Organization:Amazon INVESTS_IN Product:Video Content\n - Organization:Amazon OPPORTUNITY Country:India\n - Organization:Amazon USES Technology:Elastic Block Storage (Ebs)\n - Organization:Federal Trade Commission REGULATES Organization:Amazon\n - Organization:International Consumer Protection And Enforcement Network SUPPORTS Organization:Amazon\n\nDocument end\n""]","Rivals target cost-conscious readers and compete with Amazon by pursuing an integrated focused cost leadership/differentiation strategy. They provide books that are not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers.",reasoning,0.9999999999,0.25,0.0,0.9854858394471804,0.7730167298767534 +How do rivals target cost-conscious readers and compete with Amazon?,graph+vector,"Rivals targeting cost-conscious readers and competing with Amazon often employ strategies like cost leadership and differentiation. For instance, Barnes and Noble, a major competitor, uses an integrated focused cost leadership/differentiation strategy. They target price-sensitive consumers by offering books not commonly available at a lower price. This approach allows them to compete effectively with Amazon in the book industry by appealing to cost-conscious readers while providing unique offerings.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon 1.pdf\nContent: MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nTechnology:Internet SUPPORTS Activity:Online Shopping\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nTechnology:Internet SUPPORTS Activity:Online Shopping\nDocument end\n']","Rivals target cost-conscious readers and compete with Amazon by pursuing an integrated focused cost leadership/differentiation strategy. They provide books that are not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers.",reasoning,0.9999999999,1.0,1.0,0.9743047031882105,0.6700624112374851 +How do rivals target cost-conscious readers and compete with Amazon?,graph+vector+fulltext,"Rivals target cost-conscious readers and compete with Amazon by employing strategies like integrated focused cost leadership/differentiation. They aim to offer products not commonly available at lower prices, appealing to price-sensitive consumers. Competitors focus on increasing profitability, gaining market share, and providing a wide variety of products through competitive pricing, cost leadership, and differentiation strategies to challenge Amazon's market dominance.","['Document start\nThis Document belongs to the source About Amazon 1.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: Text Content:\n\t\r \xa0 1 Company Background When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble. As time went on, Amazon became widely known as the place to shop for books, and the self proclaimed: “world’s largest bookstore”. As time progressed, so did Amazon’s business strategy. Amazon transformed their corporate-level strategy into a mergers and acquisitions approach and did this with great success. Amazon’s goal today is to be a place a customer can go to purchase anything online. Amazon has transformed from a garage-based bookstore into a $48 billion dollar retail machine. They have been able to do this thanks to the power of technological progress. Computational power, bandwidth and data \n----\n both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. 3 Assumptions: Competitors believe that the demand for online shopping will continue to grow as consumers seek for more convenient and efficient ways of shopping. 4 Capabilities: Competitors offer similar products as Amazon either at a lower price or accompanied with differentiated services. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers. Current Situations & Challenges Amazon uses huge amount of data storage to improve customer service. However, Amazon’s Elastic Block Storage (EBS) service has been down several times in 2012 due to an \n----\n\t\r \xa0 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a variety of products, high-quality service security, and E-commerce services. Barnes and Noble, America’s largest book retailer operating both online and physical retail businesses, competes intensively with Amazon for the market share in the book industry. It pursues an integrated focused cost leadership/differentiation strategy to target price-sensitive consumers by providing books not readily or commonly available at a less expensive price. The competitors’ analysis can be summarized as below: 1 Future objectives: Competitors want to compete for a good strategic position and become the market leader in both e-commerce and physical commerce. 2 Current strategy: Competitors aim to increase profitability,\n----\n\t\r \xa0 5 information online. The fact that Amazon is able to reduce their overhead costs by not having retail locations allows them to achieve these low prices, enabling them to compete in the industry. Threat of Substitute Products The threat of substitutes for Amazon is high. The only unique characteristic Amazon has is the patented technology (such as 1-Click Ordering), which differentiates them from other possible substitutes. However, there are many alternatives providing the same products and services, which could reduce Amazon’s competitive advantage. Therefore, Amazon does not have absolute competitive advantage on their product offerings, but they definitely have the advantage when it comes to the quality of customer service and convenience provided. Rivalry Among Competing Firms Rivalry among competing firms is high. There are a vast number of search engines on the Internet which are able to influence customers decisions when searching for the best online retailer. There is also an increasing number of dot\n----\n\t\r \xa0 2 been occurring is that many consumers are changing their spending habits toward finding “bargain goods”. Amazon has been able to benefit from this greatly thanks to their ability to offer affordable goods quickly and easily to the general public. Sociocultural Online shopping has grown dramatically in popularity over recent years. E-commerce is convenient since there are no geographic restrictions and consumers can have access to a selection of goods wherever and whenever they want. Moreover, there is an unlimited selection of merchandise for customers to review and compare. These advantages shift customers’ shopping behavior from retail stores to online shopping. According to a survey of online shoppers: 48 percent of respondents shopped online in the past 12 months, 66 percent preferred web retailers, and 73 percent completed nearly half of their shopping online. The increasing popularity of online shopping is providing a foundation for Amazon to exploit their core competencies. Global With the advances in\n----\n content and distribution centers, it is difficult to predict when the benefits from these investments will be realized. Amazon has stopped growing for a while and their margin has been constant for several years. The company needs a margin expansion in order to prevent the company’s share price from dropping. Future Outlook Overall, there are remarkable challenges for Amazon to yet overcome. If they are able to swiftly react they will be able to maintain their competitive advantages through harnessing their core competencies. Continued success in the emerging markets will ensure that Amazon has a bright and better tomorrow. \n----\n providing a foundation for Amazon to exploit their core competencies. Global With the advances in modern tech support and web security, people are getting more and more willing to make purchases online. As the economies of emerging markets are rising dramatically, people in those countries with increasing purchasing power are spending more on online shopping. Services such as Amazon are able to exploit this opportunity by offering consumers goods which are not readily available in local markets. While firms expand internationally, it is critical to take into account of the differences in consumer preferences. Technological The Internet is an excellent source of data that provides the most timely available information that captures the shift in consumer preferences and trends. As technology has been rapidly advancing, businesses have been influenced by technological innovations. For example, \n----\n influence customers decisions when searching for the best online retailer. There is also an increasing number of dot-com retailers due to the relatively low start-up costs of the business. Competitor Analysis As the fastest-growing, multinational e-commerce retailer in America, Amazon has an international presence over various categories and faces an intense competition. Based on the similarity in the breadth of products, geographic market coverage, and scope of business, three major competitors are identified: eBay, Barnes and Noble, and Wal-Mart. Wal-Mart, as the world’s largest retail chain, utilizes a cost leadership strategy with low cost management in value chain to market its products at the lowest price through both e- commerce and physical stores. \n----\n\t\r \xa0 4 should also exploit opportunities from emerging countries, such as China and India, which have large populations and growing disposable income. Based on a global survey of online shoppers, people in China engaged in online shopping 8.4 times more than those in any other markets on a monthly basis. Thanks to the network of distribution services and the power of E-Commerce, Amazon is in a position to utilize its core competencies in order to capture the global market. Industry Analysis Threat of New Entrants The threat of new entrants is considered to be low. It is easy for companies to start-up in this industry due to the relatively low capital investment costs. However, to achieve the position in the industry that Amazon holds it would require huge investment in infrastructure and inventory. It is also hard for new entrants to gain customer loyalty since the existing major competitors, such as Amazon, have established a loyal customer base. \n----\n to do this thanks to the power of technological progress. Computational power, bandwidth and data storage technologies all have improved and have become relatively cheaper over time. In the present day, Amazon sells products in over forty categories. They have everything from books to electronics, to groceries. Today Amazon is a logistics platform, a search engine, an Internet advertising platform, as well as fulfilling roles as an e-commerce and IT platform. The key areas of focus for Amazon are low prices, convenience, selection, and availability. External Analysis General Environment Economic Since the start of 2008, the global economy has been in a recession. The World Bank reduced each country’s expected GDP growth for the year of 2013. The low GDP growth and high unemployment rate have reduced people’s wealth and purchasing power of goods. When the market tanked, many people chose to migrate their shopping onto an online platform. In 2011, online\n----\nEntities:\n:Cost Leadership\n:Differentiation\n:Global_Market\n:Integrated Focused Cost Leadership/Differentiation\n:Online_Sales_Taxes\n:Smartphone_App\nActivity:Online Shopping\nAmount:$48 Billion\nCompany:Amazon.Com (Founded in 1993 by Jeff Benzos, initially an online bookstore, now a $48 billion retail machine selling products in over forty categories.)\nCountry:Afghanistan\nCountry:China (Emerging country with large population and growing disposable income.)\nCountry:India (Emerging country with large population and growing disposable income.)\nCourse:Bus 478 (Seminar on Business Strategy)\nDate:1993\nEvent:Dot-Com Bubble\nLocation:Distribution Centers\nLocation:U.S. East Data Center\nOrganization:Amazon (Fastest-growing, multinational e-commerce retailer in America with international presence.)\nOrganization:Barnes And Noble (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Ebay (Major competitor of Amazon based on product breadth, geographic market coverage, and scope of business.)\nOrganization:Florida Panthers (Reigning league champions, winning the 2024 Stanley Cup Finals.)\nOrganization:Wal-Mart (World’s largest retail chain utilizing a cost leadership strategy with low cost management in value chain.)\nPerson:Jae Kim\nPerson:Jeff Benzos (Founder of Amazon.com)\nPerson:Jessica Zhang\nPerson:Jin Can Chen\nPerson:John Chen\nPerson:Mir Osman Ali Khan (The seventh Nizam who signed an instrument of accession, joining India.)\nPerson:Nizam (Ruler of Hyderabad State from 1724 to 1948, initially a viceroy of the Mughal empire in the Deccan.)\nPerson:Rinku Singh\nPerson:Rohit Sharma\nPerson:Sanju Samson\nPerson:Shivam Dube\nPerson:Tristan Landrecht\nPerson:Virat Kohli\nPerson:Yashasvi Jaiswal\nProduct:Kindle\nProduct:Video Content\nTechnology:1-Click Ordering\nTechnology:Cloud\nTechnology:Elastic Block Storage (Ebs)\nTechnology:Internet\n----\nRelationships:\nCompany:Amazon.Com FOUNDED Date:1993\nCompany:Amazon.Com FOUNDER Person:Jeff Benzos\nCompany:Amazon.Com GROWTH_MOMENTUM Event:Dot-Com Bubble\nCompany:Amazon.Com VALUATION Amount:$48 Billion\nCountry:India MATCH Country:Afghanistan\nCourse:Bus 478 GROUP_MEMBER Person:Jae Kim\nCourse:Bus 478 GROUP_MEMBER Person:Jessica Zhang\nCourse:Bus 478 GROUP_MEMBER Person:Jin Can Chen\nCourse:Bus 478 GROUP_MEMBER Person:John Chen\nCourse:Bus 478 GROUP_MEMBER Person:Tristan Landrecht\nCourse:Bus 478 PROJECT Company:Amazon.Com\nOrganization:Amazon CAPTURES :Global_Market\nOrganization:Amazon COMPETES_WITH Organization:Barnes And Noble\nOrganization:Amazon COMPETES_WITH Organization:Ebay\nOrganization:Amazon COMPETES_WITH Organization:Wal-Mart\nOrganization:Amazon COMPETITOR Organization:Barnes And Noble\nOrganization:Amazon COMPETITOR Organization:Ebay\nOrganization:Amazon COMPETITOR Organization:Wal-Mart\nOrganization:Amazon DISCLOSES :Online_Sales_Taxes\nOrganization:Amazon INVESTS_IN Location:Distribution Centers\nOrganization:Amazon INVESTS_IN Product:Kindle\nOrganization:Amazon INVESTS_IN Product:Video Content\nOrganization:Amazon INVESTS_IN Technology:Cloud\nOrganization:Amazon LOCATED_AT Location:U.S. East Data Center\nOrganization:Amazon OPPORTUNITY Country:China\nOrganization:Amazon RELEASED :Smartphone_App\nOrganization:Amazon USES Technology:1-Click Ordering\nOrganization:Amazon USES Technology:Elastic Block Storage (Ebs)\nOrganization:Barnes And Noble COMPETES_WITH Organization:Amazon\nOrganization:Barnes And Noble USES :Integrated Focused Cost Leadership/Differentiation\nOrganization:Ebay COMPETES_WITH Organization:Amazon\nOrganization:Ebay USES :Cost Leadership\nOrganization:Ebay USES :Differentiation\nOrganization:Florida Panthers ESTABLISHED Date:1993\nOrganization:Wal-Mart USES :Cost Leadership\nPerson:Jae Kim MEMBER Course:Bus 478\nPerson:Jessica Zhang MEMBER Course:Bus 478\nPerson:Jin Can Chen MEMBER Course:Bus 478\nPerson:John Chen MEMBER Course:Bus 478\nPerson:Mir Osman Ali Khan SIGNED_ACCESSION Country:India\nPerson:Nizam JOINED Country:India\nPerson:Rinku Singh PLAYER Country:India\nPerson:Rohit Sharma PLAYER Country:India\nPerson:Sanju Samson PLAYER Country:India\nPerson:Shivam Dube PLAYER Country:India\nPerson:Tristan Landrecht\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon 1.pdf\nContent: MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nTechnology:Internet SUPPORTS Activity:Online Shopping\nDocument end\n\n\nDocument start\nThis Document belongs to the source About Amazon.pdf\nContent: MEMBER Course:Bus 478\nPerson:Virat Kohli PLAYER Country:India\nPerson:Yashasvi Jaiswal PLAYER Country:India\nTechnology:Internet SUPPORTS Activity:Online Shopping\nDocument end\n']","Rivals target cost-conscious readers and compete with Amazon by pursuing an integrated focused cost leadership/differentiation strategy. They provide books that are not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers.",reasoning,0.9999999999,1.0,1.0,0.9332925514808473,0.8453744755969357 +How do rivals target cost-conscious readers and compete with Amazon?,global search+vector+fulltext,"Rivals target cost-conscious readers and compete with Amazon by employing strategies such as cost leadership and differentiation. For example, eBay utilizes both cost leadership and differentiation strategies to attract price-sensitive customers. Barnes and Noble integrates a focused cost leadership with differentiation approach to cater to readers looking for quality products at competitive prices. Wal-Mart, as the world's largest retail chain, focuses on cost leadership to offer affordable options to cost-conscious readers. These strategies help rivals position themselves effectively in the market and compete with Amazon's offerings.","[""Document start\nThis Document belongs to the source unknown\nContent: Amazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, being regulated by the Federal Trade Commission and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon is a rapidly growing multinational e-commerce retailer based in America with a significant international presence. It faces competition from major organizations such as eBay, Barnes and Noble, and Wal-Mart. eBay and Barnes and Noble are noted for their product breadth and geographic market coverage, while Wal-Mart is recognized as the world's largest retail chain, employing a cost leadership strategy.\n\nAmazon competes with eBay, which utilizes both cost leadership and differentiation strategies. Barnes and Noble also competes with Amazon, employing an integrated focused cost leadership/differentiation strategy. Wal-Mart, another competitor, uses a cost leadership approach.\n\nAmazon is actively investing in various technologies and products, including video content, cloud services, and its Kindle product. It utilizes technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations. The company has also released a smartphone app and is located at the U.S. East Data Center.\n\nAmazon sees opportunities in emerging markets like China, which has a large population and growing disposable income. It is focused on capturing the global market and has invested in distribution centers to support this goal. The company discloses information about online sales taxes and is regulated by the Federal Trade Commission. Additionally, it receives support from the International Consumer Protection and Enforcement Network.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offers a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nAmazon, a leading American multinational e-commerce retailer, is experiencing rapid growth and has established a significant international presence. It competes with major organizations such as eBay, Barnes and Noble, and Wal-Mart. Each competitor employs distinct strategies: eBay uses both cost leadership and differentiation, Barnes and Noble integrates focused cost leadership with differentiation, and Wal-Mart, the world's largest retail chain, focuses on cost leadership.\n\nAmazon is heavily investing in various technologies and products, including video content, cloud services, and its Kindle product. It leverages technologies like Elastic Block Storage (EBS) and 1-Click Ordering to enhance its operations and has developed a smartphone app. The company is strategically located at the U.S. East Data Center and is actively pursuing opportunities in emerging markets like China, which offer a large population and increasing disposable income. To support its global market capture, Amazon has invested in distribution centers and is transparent about online sales taxes, regulated by the Federal Trade Commission, and supported by the International Consumer Protection and Enforcement Network.\n\nOnline shopping, a central activity in e-commerce, is particularly popular among Generation X and Generation Y, with Baby Boomers participating less. The growth of online shopping is driven by the Internet, modern tech support, web security, and smartphones, which enhance the shopping experience. Online payment methods further increase convenience and efficiency. E-commerce acts as a catalyst for this growth, with emerging markets showing significant potential. Looking ahead, 3D virtual technology is anticipated to transform the online shopping experience, offering new dimensions to consumer engagement.\n----\nThe community consists of a group of individuals, Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht, who are enrolled in Bus 478, a seminar focused on business strategy. This course involves a collaborative or team-based approach, suggesting that these members work closely together on projects or discussions related to business strategy. Additionally, the community is linked to Amazon.com, a company founded in 1993 by Jeff Benzos. Originally an online bookstore, Amazon has evolved into a vast retail enterprise valued at $48 billion, offering a wide range of products across more than forty categories. The company saw substantial growth during the Dot-Com Bubble, with Jeff Benzos playing a crucial role in its expansion and success. This connection to Amazon may indicate that the course or group discussions could involve case studies or strategic analyses of Amazon's business model and growth trajectory.\n----\nAmazon.com, founded in 1993 by Jeff Benzos, initially started as an online bookstore. Over the years, it has grown into a massive retail enterprise valued at $48 billion, offering products across more than forty categories. The company experienced significant growth momentum during the Dot-Com Bubble. Jeff Benzos, the founder, played a pivotal role in its development and expansion.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nThe community consists of individuals and entities connected through educational and business endeavors. Jin Can Chen, Jae Kim, Jessica Zhang, John Chen, and Tristan Landrecht are all participants in Bus 478, a seminar focused on business strategy, which emphasizes teamwork and collaboration. This educational setting likely fosters skills relevant to business growth and strategy, akin to those demonstrated by Amazon.com. Founded in 1993 by Jeff Benzos, Amazon began as an online bookstore and evolved into a vast retail empire valued at $48 billion, offering a diverse range of products across more than forty categories. The company's significant expansion, particularly during the Dot-Com Bubble, highlights the importance of strategic business planning and leadership, as exemplified by Benzos' pivotal role in its success. This connection between the educational focus of Bus 478 and the business strategies employed by Amazon underscores the relevance of strategic thinking in both academic and real-world business contexts.\n----\nGeneration X and Y, recognized for their proficiency with technology, are demographic cohorts that frequently participate in online shopping activities. These groups are characterized by their comfort and familiarity with digital platforms, which facilitates their engagement in e-commerce. Their technological adeptness not only influences their purchasing habits but also reflects broader trends in consumer behavior, where digital literacy plays a crucial role in shaping market dynamics. As active participants in the online marketplace, Generation X and Y contribute significantly to the growth and evolution of e-commerce, highlighting the importance of targeting these demographics in digital marketing strategies.\n----\nGeneration X and Y, recognized for their proficiency with technology, are demographic cohorts that frequently participate in online shopping activities. These groups are characterized by their comfort and familiarity with digital platforms, which facilitates their engagement in e-commerce. Their technological adeptness not only influences their purchasing habits but also reflects broader trends in consumer behavior, where digital literacy plays a crucial role in shaping how and where people choose to shop.\nDocument end\n""]","Rivals target cost-conscious readers and compete with Amazon by pursuing an integrated focused cost leadership/differentiation strategy. They provide books that are not readily or commonly available at a less expensive price. They aim to increase profitability, gain market share, and provide a wide variety of products to compete against Amazon through utilizing competitive pricing, cost leadership, and differentiation strategies. Some of them have an international presence with stable financial performance, enabling them to form global strategic alliances to increase their market base of customers.",reasoning,0.9999999999,0.5,0.5454545454545454,0.9332912351327914,0.48836403633073866 diff --git a/experiments/modes_comparison_RAGAS.ipynb b/experiments/modes_comparison_RAGAS.ipynb new file mode 100644 index 000000000..bcc604319 --- /dev/null +++ b/experiments/modes_comparison_RAGAS.ipynb @@ -0,0 +1,3442 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Objective : To evaluate different retrievers used in llm-graph-builder application using RAGAS library" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Installation \n", + "#Other packages can be installed from requirement.txt\n", + "\n", + "!pip install ragas==0.1.14 seaborn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pre requisites to run this notebook\n", + "A pdf file to be kept in files folder and same has to be uploaded in llm-graph-builder application" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/workspaces/llm-graph-builder/backend/src/shared/common_fn.py:89: LangChainDeprecationWarning: The class `HuggingFaceEmbeddings` was deprecated in LangChain 0.2.2 and will be removed in 1.0. An updated version of the class exists in the langchain-huggingface package and should be used instead. To use it run `pip install -U langchain-huggingface` and import as `from langchain_huggingface import HuggingFaceEmbeddings`.\n", + " embeddings = SentenceTransformerEmbeddings(\n", + "/opt/conda/envs/myenv310/lib/python3.10/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from tqdm.autonotebook import tqdm, trange\n" + ] + } + ], + "source": [ + "#importing all necessary packages\n", + "#keeping this notebook in experiements folder \n", + "import sys\n", + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "from dotenv import load_dotenv\n", + "sys.path.append(os.path.abspath('../backend'))\n", + "from src.llm import get_llm\n", + "from src.QA_integration import QA_RAG, create_neo4j_chat_message_history,get_chat_mode_settings,get_neo4j_retriever,create_document_retriever_chain,retrieve_documents,format_documents,clear_chat_history\n", + "from langchain_core.messages import HumanMessage\n", + "from langchain_community.graphs import Neo4jGraph\n", + "from langchain_community.document_loaders import DirectoryLoader\n", + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "from datasets import Dataset, Features, Sequence, Value\n", + "from ragas.testset.generator import TestsetGenerator\n", + "from ragas.testset.evolutions import simple, reasoning, multi_context" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python version : 3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0]\n", + "Ragas version : 0.1.14\n", + "langchain version : 0.2.14\n" + ] + } + ], + "source": [ + "#Check version of libraries installed as it is crucial to have specific version to run the code in codespace\n", + "### Python version : 3.10.14\n", + "### Ragas version : 0.1.14\n", + "### langchain version : 0.2.14\n", + "import sys\n", + "import ragas\n", + "import langchain\n", + "print(\"Python version : \",sys.version)\n", + "print(\"Ragas version : \", ragas.__version__)\n", + "print(\"langchain version : \", langchain.__version__)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#Loading environmental variables\n", + "load_dotenv()\n", + "\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "uri = os.getenv(\"NEO4J_URI\")\n", + "userName = os.getenv(\"NEO4J_USERNAME\")\n", + "password = os.getenv(\"NEO4J_PASSWORD\")\n", + "database = os.getenv(\"NEO4J_DATABASE\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/envs/myenv310/lib/python3.10/site-packages/pypdf/_crypt_providers/_cryptography.py:32: CryptographyDeprecationWarning: ARC4 has been moved to cryptography.hazmat.decrepit.ciphers.algorithms.ARC4 and will be removed from this module in 48.0.0.\n", + " from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4\n" + ] + } + ], + "source": [ + "#Loading file kept in files folder through langchain loader\n", + "# Here we are using \"About Amazon.pdf\"\n", + "loader = DirectoryLoader(\"files\")\n", + "documents = loader.load() " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If above code gives error in codespace then run following commands in terminal \n", + "\n", + "sudo apt-get update\n", + "\n", + "sudo apt-get install -y libgl1-mesa-glx \n", + "\n", + "pip install opencv-python\n", + "\n", + "pip install typing \n", + "\n", + "then restart the kernel" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:ragas.testset.docstore:Filename and doc_id are the same for all nodes.\n", + "Generating: 100%|██████████| 15/15 [03:48<00:00, 15.21s/it]\n" + ] + } + ], + "source": [ + "#Generating synthetic test set through RAGAS library\n", + "\n", + "generator_llm = ChatOpenAI(model=\"gpt-4\")\n", + "critic_llm = ChatOpenAI(model=\"gpt-4\")\n", + "embeddings = OpenAIEmbeddings()\n", + "\n", + "generator = TestsetGenerator.from_langchain(\n", + " generator_llm,\n", + " critic_llm,\n", + " embeddings\n", + ")\n", + "\n", + "try: \n", + " testset = generator.generate_with_langchain_docs(documents, test_size=15, distributions={simple:0.3, reasoning: 0.3, multi_context: 0.4})\n", + "except Exception as e:\n", + " print(\"Error: \",e)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    questioncontextsground_truthevolution_typemetadataepisode_done
    0What issues did Amazon's Elastic Block Storage...[�s largest book retailer operating both onlin...In 2012, Amazon's Elastic Block Storage (EBS) ...simple[{'source': 'files/About Amazon.pdf'}]True
    1How does the integrated focused cost leadershi...[�s largest book retailer operating both onlin...The integrated focused cost leadership/differe...simple[{'source': 'files/About Amazon.pdf'}]True
    2What is the competitive strategy of Barnes and...[ online advertising and the security of consu...The answer to given question is not present in...simple[{'source': 'files/About Amazon.pdf'}]True
    3Why should online retailers like Amazon be car...[ online advertising and the security of consu...Online retailers like Amazon should be careful...simple[{'source': 'files/About Amazon.pdf'}]True
    4How threatening are substitute products to Ama...[ online advertising and the security of consu...The threat of substitutes for Amazon is high. ...reasoning[{'source': 'files/About Amazon.pdf'}]True
    \n", + "
    " + ], + "text/plain": [ + " question \\\n", + "0 What issues did Amazon's Elastic Block Storage... \n", + "1 How does the integrated focused cost leadershi... \n", + "2 What is the competitive strategy of Barnes and... \n", + "3 Why should online retailers like Amazon be car... \n", + "4 How threatening are substitute products to Ama... \n", + "\n", + " contexts \\\n", + "0 [�s largest book retailer operating both onlin... \n", + "1 [�s largest book retailer operating both onlin... \n", + "2 [ online advertising and the security of consu... \n", + "3 [ online advertising and the security of consu... \n", + "4 [ online advertising and the security of consu... \n", + "\n", + " ground_truth evolution_type \\\n", + "0 In 2012, Amazon's Elastic Block Storage (EBS) ... simple \n", + "1 The integrated focused cost leadership/differe... simple \n", + "2 The answer to given question is not present in... simple \n", + "3 Online retailers like Amazon should be careful... simple \n", + "4 The threat of substitutes for Amazon is high. ... reasoning \n", + "\n", + " metadata episode_done \n", + "0 [{'source': 'files/About Amazon.pdf'}] True \n", + "1 [{'source': 'files/About Amazon.pdf'}] True \n", + "2 [{'source': 'files/About Amazon.pdf'}] True \n", + "3 [{'source': 'files/About Amazon.pdf'}] True \n", + "4 [{'source': 'files/About Amazon.pdf'}] True " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Creating dataframe from generated test set\n", + "\n", + "generated_testdf = testset.to_pandas()\n", + "generated_testdf.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[\"What issues did Amazon's Elastic Block Storage (EBS) service face in 2012 and how did it affect the company?\",\n", + " 'How does the integrated focused cost leadership/differentiation strategy help competitors compete with Amazon in the book industry?',\n", + " 'What is the competitive strategy of Barnes and Noble in the e-commerce industry?',\n", + " 'Why should online retailers like Amazon be careful about disclosing online sales taxes in their advertisements?',\n", + " \"How threatening are substitute products to Amazon's patented tech and customer service?\",\n", + " \"What was the 2012 issue with Amazon's EBS and its proposed fix?\",\n", + " \"What strategy led to Amazon's growth from a small bookstore to a $48B retail giant?\",\n", + " 'What strategic shift helped Amazon evolve from a small bookstore to a $48B retail giant?',\n", + " \"How has Amazon's M&A strategy, transitioning from an online bookstore to a multi-category platform, been shaped by competition?\",\n", + " \"What strategies are Amazon's book competitors using for market share and how does it compare to Amazon's?\",\n", + " 'What methods does ICPEN use for cross-border online consumer issues and its effect on retailers like Amazon?',\n", + " \"What strategies do Amazon's book competitors use to attract price-sensitive customers and grow market share, leveraging online shopping and unique, cheap books?\",\n", + " \"What strategies are Amazon's book rivals using for price-sensitive customers and market growth, and its impact on Amazon's long-term investment focus?\",\n", + " \"What were the 2012 issues with Amazon's EBS at the U.S. East center and their impact on customer service and future investments?\",\n", + " 'How do rivals target cost-conscious readers and compete with Amazon?']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "generated_testdf['question'].to_list()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Questions are of following types\n", + "\n", + "Simple : Straight forward question from the context\n", + "\n", + "Reasoning: Rewrite the question in a way that enhances the need for reasoning to answer it effectively.\n", + "\n", + "Multi-Context: Rephrase the question in a manner that necessitates information from multiple related sections or chunks to formulate an answer." + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "evolution_type\n", + "multi_context 6\n", + "reasoning 5\n", + "simple 4\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 113, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "generated_testdf['evolution_type'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "#Optional - Saving generated testset for further use\n", + "\n", + "generated_testdf.to_csv(\"RAGAS_testset.csv\",index= False)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "#Optional if you are re running code you can start from here\n", + "#generated_testdf = pd.read_csv(\"RAGAS_testset.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Now we will call our QnA bot to answer same questions that are generated through test set" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "def create_graph_database_connection(uri, userName, password, database):\n", + " enable_user_agent = os.environ.get(\"ENABLE_USER_AGENT\", \"False\").lower() in (\"true\", \"1\", \"yes\")\n", + " if enable_user_agent:\n", + " graph = Neo4jGraph(url=uri, database=database, username=userName, password=password, refresh_schema=False, sanitize=True,driver_config={'user_agent':os.environ.get('NEO4J_USER_AGENT')}) \n", + " else:\n", + " graph = Neo4jGraph(url=uri, database=database, username=userName, password=password, refresh_schema=False, sanitize=True) \n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "#Graph connection\n", + "graph = create_graph_database_connection(uri, userName, password, database)\n", + "\n", + "#Chat modes\n", + "CHAT_VECTOR_MODE = \"vector\"\n", + "CHAT_FULLTEXT_MODE = \"fulltext\"\n", + "CHAT_ENTITY_VECTOR_MODE = \"entity search+vector\"\n", + "CHAT_VECTOR_GRAPH_MODE = \"graph+vector\"\n", + "CHAT_VECTOR_GRAPH_FULLTEXT_MODE = \"graph+vector+fulltext\"\n", + "CHAT_GLOBAL_VECTOR_FULLTEXT_MODE = \"global search+vector+fulltext\"\n", + "\n", + "Modes = [CHAT_VECTOR_MODE,CHAT_FULLTEXT_MODE,CHAT_ENTITY_VECTOR_MODE,CHAT_VECTOR_GRAPH_MODE,CHAT_VECTOR_GRAPH_FULLTEXT_MODE,CHAT_GLOBAL_VECTOR_FULLTEXT_MODE]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[\"What issues did Amazon's Elastic Block Storage (EBS) service face in 2012 and how did it affect the company?\",\n", + " 'How does the integrated focused cost leadership strategy help competitors compete with Amazon in the book industry?',\n", + " 'What is the competitive strategy of Barnes and Noble in the e-commerce industry?',\n", + " 'Why should online retailers like Amazon be careful about disclosing online sales taxes in their advertisements?',\n", + " \"How threatening are substitute products to Amazon's patented tech and customer service?\",\n", + " \"What was the 2012 issue with Amazon's EBS and its proposed fix?\",\n", + " \"What strategy led to Amazon's growth from a small bookstore to a $48B retail giant?\",\n", + " 'What strategic shift helped Amazon evolve from a small bookstore to a $48B retail giant?',\n", + " \"How has Amazon's M and A strategy, transitioning from an online bookstore to a multi-category platform, been shaped by competition?\",\n", + " \"What strategies are Amazon's book competitors using for market share and how does it compare to Amazon's?\",\n", + " 'What methods does ICPEN use for cross-border online consumer issues and its effect on retailers like Amazon?',\n", + " \"What strategies do Amazon's book competitors use to attract price-sensitive customers and grow market share, leveraging online shopping and unique, cheap books?\",\n", + " \"What strategies are Amazon's book rivals using for price-sensitive customers and market growth, and its impact on Amazon's long-term investment focus?\",\n", + " \"What were the 2012 issues with Amazon's EBS at the U.S. East center and their impact on customer service and future investments?\",\n", + " 'How do rivals target cost-conscious readers and compete with Amazon?']" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Creating list of questions from generated testset\n", + "Questions = generated_testdf['question'].to_list()\n", + "Questions" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "def fetch_context(mode,session_id,question):\n", + " history = create_neo4j_chat_message_history(graph, session_id)\n", + " messages = history.messages\n", + " user_question = HumanMessage(content=question)\n", + " messages.append(user_question)\n", + " model = 'openai-gpt-3.5'\n", + " document_names = '[]'\n", + " llm, model_name = get_llm(model=model)\n", + " chat_mode_settings = get_chat_mode_settings(mode=mode)\n", + " retriever = get_neo4j_retriever(graph=graph, chat_mode_settings=chat_mode_settings, document_names=document_names)\n", + " doc_retriever = create_document_retriever_chain(llm, retriever)\n", + " docs = retrieve_documents(doc_retriever, messages) \n", + " context = format_documents(docs, model)\n", + " return context[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "def test_chatbot_qna(uri, userName, password,database, model_name, mode, question):\n", + " \"\"\"Test chatbot QnA functionality for different modes.\"\"\"\n", + " print(question,mode)\n", + " session_id = int(Modes.index(mode)+1)\n", + " graph = create_graph_database_connection(uri, userName, password, database) \n", + " clear_chat_history(graph, session_id)\n", + " QA_n_RAG = QA_RAG(graph, model_name, question, '[]', session_id, mode)\n", + " generated_context = fetch_context(mode,session_id,question)\n", + " try:\n", + " assert len(QA_n_RAG['message']) > 20\n", + " return QA_n_RAG['message'],generated_context\n", + " except AssertionError as e:\n", + " print(\"Failed for mode : \",mode,\" session id :\",session_id, \"Error : \", e)\n", + " return QA_n_RAG" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "def process_qna_pair(question, mode): \n", + " answer,generated_context = test_chatbot_qna(uri, userName, password,database,model_name='openai-gpt-3.5', mode=mode, question=question)\n", + " return question, mode, answer,generated_context" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#This cell may take few minutes based on no of question and modes\n", + "\n", + "results = []\n", + "results = [process_qna_pair(question, mode) for question in Questions for mode in Modes]" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    questionmodeanswerapplication_context
    0What issues did Amazon's Elastic Block Storage...vectorAmazon's Elastic Block Storage (EBS) service f...Document start\\nThis Document belongs to the s...
    1What issues did Amazon's Elastic Block Storage...fulltextAmazon's Elastic Block Storage (EBS) service f...Document start\\nThis Document belongs to the s...
    2What issues did Amazon's Elastic Block Storage...entity search+vectorIn 2012, Amazon's Elastic Block Storage (EBS) ...Document start\\nThis Document belongs to the s...
    3What issues did Amazon's Elastic Block Storage...graph+vectorAmazon's Elastic Block Storage (EBS) service f...Document start\\nThis Document belongs to the s...
    4What issues did Amazon's Elastic Block Storage...graph+vector+fulltextAmazon's Elastic Block Storage (EBS) service f...Document start\\nThis Document belongs to the s...
    \n", + "
    " + ], + "text/plain": [ + " question mode \\\n", + "0 What issues did Amazon's Elastic Block Storage... vector \n", + "1 What issues did Amazon's Elastic Block Storage... fulltext \n", + "2 What issues did Amazon's Elastic Block Storage... entity search+vector \n", + "3 What issues did Amazon's Elastic Block Storage... graph+vector \n", + "4 What issues did Amazon's Elastic Block Storage... graph+vector+fulltext \n", + "\n", + " answer \\\n", + "0 Amazon's Elastic Block Storage (EBS) service f... \n", + "1 Amazon's Elastic Block Storage (EBS) service f... \n", + "2 In 2012, Amazon's Elastic Block Storage (EBS) ... \n", + "3 Amazon's Elastic Block Storage (EBS) service f... \n", + "4 Amazon's Elastic Block Storage (EBS) service f... \n", + "\n", + " application_context \n", + "0 Document start\\nThis Document belongs to the s... \n", + "1 Document start\\nThis Document belongs to the s... \n", + "2 Document start\\nThis Document belongs to the s... \n", + "3 Document start\\nThis Document belongs to the s... \n", + "4 Document start\\nThis Document belongs to the s... " + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retrieved_df = pd.DataFrame(results, columns=['question', 'mode', 'answer','application_context'])\n", + "retrieved_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Some of the asnwers given by bot are not appropriate" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    questionmodeanswerapplication_context
    5What issues did Amazon's Elastic Block Storage...global search+vector+fulltextI don't have that specific information availab...Document start\\nThis Document belongs to the s...
    35What was the 2012 issue with Amazon's EBS and ...global search+vector+fulltextI don't have that information right now. Is th...Document start\\nThis Document belongs to the s...
    80What were the 2012 issues with Amazon's EBS at...entity search+vectorI don't have specific information regarding th...Document start\\nThis Document belongs to the s...
    83What were the 2012 issues with Amazon's EBS at...global search+vector+fulltextI don't have that information right now. Would...Document start\\nThis Document belongs to the s...
    \n", + "
    " + ], + "text/plain": [ + " question \\\n", + "5 What issues did Amazon's Elastic Block Storage... \n", + "35 What was the 2012 issue with Amazon's EBS and ... \n", + "80 What were the 2012 issues with Amazon's EBS at... \n", + "83 What were the 2012 issues with Amazon's EBS at... \n", + "\n", + " mode \\\n", + "5 global search+vector+fulltext \n", + "35 global search+vector+fulltext \n", + "80 entity search+vector \n", + "83 global search+vector+fulltext \n", + "\n", + " answer \\\n", + "5 I don't have that specific information availab... \n", + "35 I don't have that information right now. Is th... \n", + "80 I don't have specific information regarding th... \n", + "83 I don't have that information right now. Would... \n", + "\n", + " application_context \n", + "5 Document start\\nThis Document belongs to the s... \n", + "35 Document start\\nThis Document belongs to the s... \n", + "80 Document start\\nThis Document belongs to the s... \n", + "83 Document start\\nThis Document belongs to the s... " + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retrieved_df[retrieved_df['answer'].str.lower().str.startswith(\"i don't have\")]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Important : Above result shows certain questions were not answered by QnA bot.\n", + "\n", + "#Further analysis needs to be done here" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(86, 4)" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#We will only consider proper answers for analysis\n", + "\n", + "retrieved_df = retrieved_df[~retrieved_df['answer'].str.lower().str.startswith(\"i don't have\")]\n", + "retrieved_df.to_csv('chatbot_answers.csv', index=False)\n", + "retrieved_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "#Optional if you are running code from here you can uncomment\n", + "\n", + "# retrieved_df = pd.read_csv(\"chatbot_answers.csv\")\n", + "# retrieved_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "generated_testdf = generated_testdf.rename(columns={'contexts':'RAGAS_context'})" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated test set columns : Index(['question', 'RAGAS_context', 'ground_truth', 'evolution_type',\n", + " 'metadata', 'episode_done'],\n", + " dtype='object')\n", + "QnA Bot Retrieved set columns: Index(['question', 'mode', 'answer', 'application_context'], dtype='object')\n" + ] + } + ], + "source": [ + "print(\"Generated test set columns : \",generated_testdf.columns)\n", + "print(\"QnA Bot Retrieved set columns: \",retrieved_df.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(86, 9)" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df = pd.merge(retrieved_df,generated_testdf,on='question',how='left')\n", + "final_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "#optional code if next cell is giving error as \"ValueError: Dataset feature \"contexts\" should be of type Sequence[string]\"ArithmeticError\n", + "\n", + "# final_df['contexts'] = final_df['contexts'].apply(lambda x:[x] if isinstance(x,str) else x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### EVALUATION" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['question', 'mode', 'answer', 'contexts', 'ground_truth', 'question_complexity'],\n", + " num_rows: 86\n", + "})" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Creating dataset for evaluation\n", + "\n", + "data_samples = {\n", + " \n", + " 'question': final_df['question'].tolist(),\n", + " 'mode':final_df['mode'].tolist(),\n", + " 'answer': final_df['answer'].tolist(),\n", + " 'contexts': final_df['application_context'].tolist(),\n", + " 'ground_truth': final_df['ground_truth'].tolist(),\n", + " 'question_complexity' : final_df['evolution_type'].tolist()\n", + "}\n", + "features = Features({\n", + " 'question': Value('string'),\n", + " 'mode': Value('string'),\n", + " 'answer': Value('string'),\n", + " 'contexts': Value('string'), \n", + " 'ground_truth': Value('string'),\n", + " 'question_complexity': Value('string')\n", + "})\n", + "dataset = Dataset.from_dict(data_samples, features=features)\n", + "dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Converting context into list of strings as it is required in next step\n", + "\n", + "def preprocess_function(example):\n", + " example[\"contexts\"] = [example[\"contexts\"]]\n", + " return example\n", + "\n", + "dataset = dataset.map(preprocess_function)" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 430/430 [03:23<00:00, 2.11it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    questionmodeanswercontextsground_truthquestion_complexitycontext_precisioncontext_recallfaithfulnessanswer_relevancyanswer_correctness
    0What issues did Amazon's Elastic Block Storage...vectorAmazon's Elastic Block Storage (EBS) service f...[Document start\\nThis Document belongs to the ...In 2012, Amazon's Elastic Block Storage (EBS) ...simple1.01.00.7500.9240690.769896
    1What issues did Amazon's Elastic Block Storage...fulltextAmazon's Elastic Block Storage (EBS) service f...[Document start\\nThis Document belongs to the ...In 2012, Amazon's Elastic Block Storage (EBS) ...simple1.01.00.6250.9240260.797546
    2What issues did Amazon's Elastic Block Storage...entity search+vectorIn 2012, Amazon's Elastic Block Storage (EBS) ...[Document start\\nThis Document belongs to the ...In 2012, Amazon's Elastic Block Storage (EBS) ...simple0.00.00.0000.9507040.733410
    3What issues did Amazon's Elastic Block Storage...graph+vectorAmazon's Elastic Block Storage (EBS) service f...[Document start\\nThis Document belongs to the ...In 2012, Amazon's Elastic Block Storage (EBS) ...simple1.01.01.0000.9240690.845223
    4What issues did Amazon's Elastic Block Storage...graph+vector+fulltextAmazon's Elastic Block Storage (EBS) service f...[Document start\\nThis Document belongs to the ...In 2012, Amazon's Elastic Block Storage (EBS) ...simple1.01.01.0000.9266900.844968
    \n", + "
    " + ], + "text/plain": [ + " question mode \\\n", + "0 What issues did Amazon's Elastic Block Storage... vector \n", + "1 What issues did Amazon's Elastic Block Storage... fulltext \n", + "2 What issues did Amazon's Elastic Block Storage... entity search+vector \n", + "3 What issues did Amazon's Elastic Block Storage... graph+vector \n", + "4 What issues did Amazon's Elastic Block Storage... graph+vector+fulltext \n", + "\n", + " answer \\\n", + "0 Amazon's Elastic Block Storage (EBS) service f... \n", + "1 Amazon's Elastic Block Storage (EBS) service f... \n", + "2 In 2012, Amazon's Elastic Block Storage (EBS) ... \n", + "3 Amazon's Elastic Block Storage (EBS) service f... \n", + "4 Amazon's Elastic Block Storage (EBS) service f... \n", + "\n", + " contexts \\\n", + "0 [Document start\\nThis Document belongs to the ... \n", + "1 [Document start\\nThis Document belongs to the ... \n", + "2 [Document start\\nThis Document belongs to the ... \n", + "3 [Document start\\nThis Document belongs to the ... \n", + "4 [Document start\\nThis Document belongs to the ... \n", + "\n", + " ground_truth question_complexity \\\n", + "0 In 2012, Amazon's Elastic Block Storage (EBS) ... simple \n", + "1 In 2012, Amazon's Elastic Block Storage (EBS) ... simple \n", + "2 In 2012, Amazon's Elastic Block Storage (EBS) ... simple \n", + "3 In 2012, Amazon's Elastic Block Storage (EBS) ... simple \n", + "4 In 2012, Amazon's Elastic Block Storage (EBS) ... simple \n", + "\n", + " context_precision context_recall faithfulness answer_relevancy \\\n", + "0 1.0 1.0 0.750 0.924069 \n", + "1 1.0 1.0 0.625 0.924026 \n", + "2 0.0 0.0 0.000 0.950704 \n", + "3 1.0 1.0 1.000 0.924069 \n", + "4 1.0 1.0 1.000 0.926690 \n", + "\n", + " answer_correctness \n", + "0 0.769896 \n", + "1 0.797546 \n", + "2 0.733410 \n", + "3 0.845223 \n", + "4 0.844968 " + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Evaluating using RAGAS\n", + "\n", + "from ragas.metrics import (\n", + " answer_relevancy,\n", + " faithfulness,\n", + " context_recall,\n", + " context_precision,\n", + " answer_correctness\n", + ")\n", + "\n", + "from ragas import evaluate\n", + "\n", + "result = evaluate(\n", + " dataset,\n", + " metrics=[\n", + " context_precision,\n", + " context_recall,\n", + " faithfulness,\n", + " answer_relevancy,\n", + " answer_correctness,\n", + " ]\n", + ")\n", + "\n", + "eval_df = result.to_pandas()\n", + "eval_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "eval_df.to_csv(\"Final_Score.csv\",index= False)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['question', 'mode', 'answer', 'contexts', 'ground_truth',\n", + " 'question_complexity', 'context_precision', 'context_recall',\n", + " 'faithfulness', 'answer_relevancy', 'answer_correctness'],\n", + " dtype='object')" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eval_df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    modequestion_complexitycontext_precisioncontext_recall...answer_relevancyanswer_correctness
    countmeanminmaxstdcountmeanmin...countmeanminmaxstdcountmeanminmaxstd
    0entity search+vectormulti_context50.8000000.01.00.44721450.5619050.142857...50.9442880.8978610.9700960.02812350.7424960.5234710.9922690.170004
    1entity search+vectorreasoning50.8000000.01.00.44721450.2500000.000000...50.9672520.9534660.9854860.01173550.8035360.5657110.9436290.147004
    2entity search+vectorsimple40.5000000.01.00.57735040.5000000.000000...40.9767750.9507041.0000000.02059640.5377660.1726750.7334100.251963
    3fulltextmulti_context60.8333330.01.00.40824860.9523810.714286...60.9398130.9308140.9553020.00961560.8033990.4373950.9955870.208676
    4fulltextreasoning51.0000001.01.00.00000051.0000001.000000...50.9549650.9348950.9742460.01438750.8533490.6190320.9967040.187401
    5fulltextsimple41.0000001.01.00.00000041.0000001.000000...40.9692420.9240261.0000000.03223140.6723180.1753730.9401640.339199
    6global search+vector+fulltextmulti_context50.8000000.01.00.44721450.5119050.142857...50.9340880.9003930.9677280.02959950.7355390.6095250.9091840.123181
    7global search+vector+fulltextreasoning40.5000000.01.00.57735040.1250000.000000...40.9537060.9332910.9667640.01433240.7278430.4883640.8461430.162029
    8global search+vector+fulltextsimple30.6666670.01.00.57735030.6666670.000000...30.9753290.9732300.9788110.00303730.3945940.1721250.6658700.250464
    9graph+vectormulti_context60.8333330.01.00.40824860.9523810.714286...60.9302910.8978610.9579430.02641060.7421800.4913980.9181700.188076
    10graph+vectorreasoning51.0000001.01.00.00000051.0000001.000000...50.7647480.0000000.9743050.42774550.8584120.6700620.9965560.137474
    11graph+vectorsimple41.0000001.01.00.00000041.0000001.000000...40.9446750.9144510.9894750.03357340.5594330.1749660.8452230.300916
    12graph+vector+fulltextmulti_context60.8333330.01.00.40824860.9523810.714286...60.9428060.9227840.9553020.01330660.7367840.4372160.9902460.237939
    13graph+vector+fulltextreasoning51.0000001.01.00.00000051.0000001.000000...50.9352750.8926180.9620120.02679650.7869990.4557120.9466830.190876
    14graph+vector+fulltextsimple41.0000001.01.00.00000041.0000001.000000...40.9723420.9266900.9999980.03235940.6436410.1750820.9343100.339174
    15vectormulti_context60.8333330.01.00.40824860.9107140.714286...60.9352970.9149590.9553380.01368860.8133150.5442150.9912020.156961
    16vectorreasoning51.0000001.01.00.00000051.0000001.000000...50.9567080.9348950.9855040.01836450.6771240.4754650.8232990.126854
    17vectorsimple40.7500000.01.00.50000041.0000001.000000...40.9686940.9240690.9977440.03150540.5845350.1753240.8178120.292301
    \n", + "

    18 rows × 27 columns

    \n", + "
    " + ], + "text/plain": [ + " mode question_complexity context_precision \\\n", + " count \n", + "0 entity search+vector multi_context 5 \n", + "1 entity search+vector reasoning 5 \n", + "2 entity search+vector simple 4 \n", + "3 fulltext multi_context 6 \n", + "4 fulltext reasoning 5 \n", + "5 fulltext simple 4 \n", + "6 global search+vector+fulltext multi_context 5 \n", + "7 global search+vector+fulltext reasoning 4 \n", + "8 global search+vector+fulltext simple 3 \n", + "9 graph+vector multi_context 6 \n", + "10 graph+vector reasoning 5 \n", + "11 graph+vector simple 4 \n", + "12 graph+vector+fulltext multi_context 6 \n", + "13 graph+vector+fulltext reasoning 5 \n", + "14 graph+vector+fulltext simple 4 \n", + "15 vector multi_context 6 \n", + "16 vector reasoning 5 \n", + "17 vector simple 4 \n", + "\n", + " context_recall ... \\\n", + " mean min max std count mean min ... \n", + "0 0.800000 0.0 1.0 0.447214 5 0.561905 0.142857 ... \n", + "1 0.800000 0.0 1.0 0.447214 5 0.250000 0.000000 ... \n", + "2 0.500000 0.0 1.0 0.577350 4 0.500000 0.000000 ... \n", + "3 0.833333 0.0 1.0 0.408248 6 0.952381 0.714286 ... \n", + "4 1.000000 1.0 1.0 0.000000 5 1.000000 1.000000 ... \n", + "5 1.000000 1.0 1.0 0.000000 4 1.000000 1.000000 ... \n", + "6 0.800000 0.0 1.0 0.447214 5 0.511905 0.142857 ... \n", + "7 0.500000 0.0 1.0 0.577350 4 0.125000 0.000000 ... \n", + "8 0.666667 0.0 1.0 0.577350 3 0.666667 0.000000 ... \n", + "9 0.833333 0.0 1.0 0.408248 6 0.952381 0.714286 ... \n", + "10 1.000000 1.0 1.0 0.000000 5 1.000000 1.000000 ... \n", + "11 1.000000 1.0 1.0 0.000000 4 1.000000 1.000000 ... \n", + "12 0.833333 0.0 1.0 0.408248 6 0.952381 0.714286 ... \n", + "13 1.000000 1.0 1.0 0.000000 5 1.000000 1.000000 ... \n", + "14 1.000000 1.0 1.0 0.000000 4 1.000000 1.000000 ... \n", + "15 0.833333 0.0 1.0 0.408248 6 0.910714 0.714286 ... \n", + "16 1.000000 1.0 1.0 0.000000 5 1.000000 1.000000 ... \n", + "17 0.750000 0.0 1.0 0.500000 4 1.000000 1.000000 ... \n", + "\n", + " answer_relevancy \\\n", + " count mean min max std \n", + "0 5 0.944288 0.897861 0.970096 0.028123 \n", + "1 5 0.967252 0.953466 0.985486 0.011735 \n", + "2 4 0.976775 0.950704 1.000000 0.020596 \n", + "3 6 0.939813 0.930814 0.955302 0.009615 \n", + "4 5 0.954965 0.934895 0.974246 0.014387 \n", + "5 4 0.969242 0.924026 1.000000 0.032231 \n", + "6 5 0.934088 0.900393 0.967728 0.029599 \n", + "7 4 0.953706 0.933291 0.966764 0.014332 \n", + "8 3 0.975329 0.973230 0.978811 0.003037 \n", + "9 6 0.930291 0.897861 0.957943 0.026410 \n", + "10 5 0.764748 0.000000 0.974305 0.427745 \n", + "11 4 0.944675 0.914451 0.989475 0.033573 \n", + "12 6 0.942806 0.922784 0.955302 0.013306 \n", + "13 5 0.935275 0.892618 0.962012 0.026796 \n", + "14 4 0.972342 0.926690 0.999998 0.032359 \n", + "15 6 0.935297 0.914959 0.955338 0.013688 \n", + "16 5 0.956708 0.934895 0.985504 0.018364 \n", + "17 4 0.968694 0.924069 0.997744 0.031505 \n", + "\n", + " answer_correctness \n", + " count mean min max std \n", + "0 5 0.742496 0.523471 0.992269 0.170004 \n", + "1 5 0.803536 0.565711 0.943629 0.147004 \n", + "2 4 0.537766 0.172675 0.733410 0.251963 \n", + "3 6 0.803399 0.437395 0.995587 0.208676 \n", + "4 5 0.853349 0.619032 0.996704 0.187401 \n", + "5 4 0.672318 0.175373 0.940164 0.339199 \n", + "6 5 0.735539 0.609525 0.909184 0.123181 \n", + "7 4 0.727843 0.488364 0.846143 0.162029 \n", + "8 3 0.394594 0.172125 0.665870 0.250464 \n", + "9 6 0.742180 0.491398 0.918170 0.188076 \n", + "10 5 0.858412 0.670062 0.996556 0.137474 \n", + "11 4 0.559433 0.174966 0.845223 0.300916 \n", + "12 6 0.736784 0.437216 0.990246 0.237939 \n", + "13 5 0.786999 0.455712 0.946683 0.190876 \n", + "14 4 0.643641 0.175082 0.934310 0.339174 \n", + "15 6 0.813315 0.544215 0.991202 0.156961 \n", + "16 5 0.677124 0.475465 0.823299 0.126854 \n", + "17 4 0.584535 0.175324 0.817812 0.292301 \n", + "\n", + "[18 rows x 27 columns]" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "visual_df = eval_df.groupby(['mode','question_complexity'])[['context_precision','context_recall','faithfulness', 'answer_relevancy','answer_correctness']].agg(['count','mean','min','max','std']).reset_index()\n", + "visual_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Analysing each metric " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1. Faithfulness\n", + "\n", + "This measures the factual consistency of the generated answer against the given context" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    modequestion_complexitymeanminmaxstd
    0entity search+vectormulti_context0.3475000.0000000.8000000.296911
    1entity search+vectorreasoning0.5303270.0000000.9000000.330579
    2entity search+vectorsimple0.3833330.0000001.0000000.433333
    3fulltextmulti_context0.7382640.5454550.9285710.171521
    4fulltextreasoning0.9064940.7142861.0000000.133204
    5fulltextsimple0.7514880.6250000.8888890.110986
    6global search+vector+fulltextmulti_context0.4938390.1428570.8000000.235594
    7global search+vector+fulltextreasoning0.5370850.3000000.6875000.168360
    8global search+vector+fulltextsimple0.1944440.0000000.3333330.173472
    9graph+vectormulti_context0.8103110.3750001.0000000.233402
    10graph+vectorreasoning0.9066670.5333331.0000000.208700
    11graph+vectorsimple0.7460320.4285711.0000000.297804
    12graph+vector+fulltextmulti_context0.8440240.6363641.0000000.147066
    13graph+vector+fulltextreasoning0.9200000.6000001.0000000.178885
    14graph+vector+fulltextsimple0.8928570.5714291.0000000.214286
    15vectormulti_context0.7261150.5000001.0000000.203947
    16vectorreasoning0.9066670.7000001.0000000.136219
    17vectorsimple0.7083330.3333331.0000000.276385
    \n", + "
    " + ], + "text/plain": [ + " mode question_complexity mean min \\\n", + "0 entity search+vector multi_context 0.347500 0.000000 \n", + "1 entity search+vector reasoning 0.530327 0.000000 \n", + "2 entity search+vector simple 0.383333 0.000000 \n", + "3 fulltext multi_context 0.738264 0.545455 \n", + "4 fulltext reasoning 0.906494 0.714286 \n", + "5 fulltext simple 0.751488 0.625000 \n", + "6 global search+vector+fulltext multi_context 0.493839 0.142857 \n", + "7 global search+vector+fulltext reasoning 0.537085 0.300000 \n", + "8 global search+vector+fulltext simple 0.194444 0.000000 \n", + "9 graph+vector multi_context 0.810311 0.375000 \n", + "10 graph+vector reasoning 0.906667 0.533333 \n", + "11 graph+vector simple 0.746032 0.428571 \n", + "12 graph+vector+fulltext multi_context 0.844024 0.636364 \n", + "13 graph+vector+fulltext reasoning 0.920000 0.600000 \n", + "14 graph+vector+fulltext simple 0.892857 0.571429 \n", + "15 vector multi_context 0.726115 0.500000 \n", + "16 vector reasoning 0.906667 0.700000 \n", + "17 vector simple 0.708333 0.333333 \n", + "\n", + " max std \n", + "0 0.800000 0.296911 \n", + "1 0.900000 0.330579 \n", + "2 1.000000 0.433333 \n", + "3 0.928571 0.171521 \n", + "4 1.000000 0.133204 \n", + "5 0.888889 0.110986 \n", + "6 0.800000 0.235594 \n", + "7 0.687500 0.168360 \n", + "8 0.333333 0.173472 \n", + "9 1.000000 0.233402 \n", + "10 1.000000 0.208700 \n", + "11 1.000000 0.297804 \n", + "12 1.000000 0.147066 \n", + "13 1.000000 0.178885 \n", + "14 1.000000 0.214286 \n", + "15 1.000000 0.203947 \n", + "16 1.000000 0.136219 \n", + "17 1.000000 0.276385 " + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "faithfulness_df = eval_df.groupby(['mode', 'question_complexity']).agg({\n", + " 'faithfulness': ['mean', 'min', 'max', 'std']\n", + "}).reset_index()\n", + "faithfulness_df.columns = ['mode', 'question_complexity', 'mean', 'min', 'max', 'std']\n", + "faithfulness_df" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
    " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Grouped bar chart\n", + "metrics = ['mean', 'std']\n", + "x = np.arange(len(faithfulness_df['mode'])) # Label locations\n", + "width = 0.2 # Bar width\n", + "fig, ax = plt.subplots(figsize=(12, 6))\n", + "for i, metric in enumerate(metrics):\n", + " ax.bar(x + i * width, faithfulness_df[metric], width, label=metric)\n", + "ax.set_xticks(x + width * 1.5)\n", + "ax.set_xticklabels(faithfulness_df['mode'] + ' (' + faithfulness_df['question_complexity'] + ')', rotation=45, ha='right')\n", + "ax.set_xlabel('Modes (Question Complexity)')\n", + "ax.set_ylabel('Values')\n", + "ax.set_title('Grouped Bar Chart of Faithfulness Metrics by Mode and Question Complexity')\n", + "ax.legend(title=\"Metrics\")\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. Surprisingly faithfulness score is less in \"entity search + vector\" and \"global search + vector + fulltext \" modes\n", + "2. We can observe that faithfulness score increases on reasoning question for most of the modes\n", + "3. All modes struggle on simple questions." + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    questionmodeanswerfaithfulness
    10How does the integrated focused cost leadership strategy help competitors compete with Amazon in the book industry?global search+vector+fulltextThe integrated focused cost leadership strategy helps competitors like Barnes and Noble compete with Amazon in the book industry by allowing them to offer a unique value proposition. By combining cost leadership with differentiation, Barnes and Noble can focus on specific market segments or niches where they can provide distinct products or services at competitive prices. This strategy enables them to target specific customer needs effectively while maintaining cost efficiency, thereby creating a competitive advantage against Amazon's broader market approach.0.250000
    16What is the competitive strategy of Barnes and Noble in the e-commerce industry?global search+vector+fulltextBarnes and Noble in the e-commerce industry employs an integrated focused cost leadership/differentiation strategy. This strategy combines elements of cost leadership and differentiation to set itself apart from competitors like Amazon and eBay. By focusing on cost efficiency while also offering unique and differentiated products or services, Barnes and Noble aims to carve out a competitive position in the market.0.333333
    22Why should online retailers like Amazon be careful about disclosing online sales taxes in their advertisements?global search+vector+fulltextOnline retailers like Amazon should be cautious about disclosing online sales taxes in their advertisements to maintain transparency and compliance with regulations. By clearly stating the inclusion of sales taxes, they build trust with customers and avoid potential legal issues related to false advertising or misleading practices. Additionally, disclosing online sales taxes helps prevent any misunderstandings or surprises for customers during the checkout process, leading to a smoother shopping experience and reducing the risk of customer dissatisfaction or complaints.0.000000
    28How threatening are substitute products to Amazon's patented tech and customer service?global search+vector+fulltextSubstitute products can pose a moderate threat to Amazon's patented technology and customer service. While Amazon's patents provide a level of protection for its innovations, substitute products could still impact its market share and competitive position. Amazon's strong customer service reputation may mitigate some of this threat by fostering customer loyalty. However, continuous innovation and strategic differentiation are crucial for Amazon to stay ahead of potential substitutes in the rapidly evolving e-commerce landscape.0.300000
    57What strategies are Amazon's book competitors using for market share and how does it compare to Amazon's?global search+vector+fulltextAmazon's book competitors employ various strategies to compete for market share. eBay utilizes both cost leadership and differentiation strategies, aiming to offer competitive pricing and a unique value proposition to customers. Barnes and Noble integrates a focused cost leadership/differentiation strategy, focusing on specific market segments while offering differentiated products and services. Wal-Mart, as the world's largest retail chain, primarily focuses on a cost leadership approach, leveraging its scale to provide competitive prices.\\n\\nIn comparison, Amazon employs a mix of strategies, heavily investing in technologies like video content, cloud services, and its Kindle product. It leverages technologies such as Elastic Block Storage (EBS) and 1-Click Ordering to enhance operations and customer experience. Amazon's strategic focus on innovation, technology, and global market expansion sets it apart from its competitors, allowing it to maintain a strong position in the e-commerce landscape.0.454545
    69What strategies do Amazon's book competitors use to attract price-sensitive customers and grow market share, leveraging online shopping and unique, cheap books?global search+vector+fulltextAmazon's book competitors employ various strategies to attract price-sensitive customers and expand their market share in the realm of online shopping. eBay, Barnes and Noble, and Wal-Mart utilize different approaches to cater to this segment:\\n\\n1. **eBay**: Utilizes both cost leadership and differentiation strategies to appeal to price-sensitive customers. It offers a wide range of products, including books, at competitive prices while also providing a unique platform for auctions and second-hand items.\\n\\n2. **Barnes and Noble**: Integrates a focused cost leadership/differentiation strategy to stand out in the market. By offering a mix of affordable books along with unique and high-quality selections, they target price-sensitive customers seeking value and variety.\\n\\n3. **Wal-Mart**: Focuses on a cost leadership approach, leveraging its vast retail chain to provide competitive pricing on books and other products. By emphasizing affordability and a wide selection, Wal-Mart aims to attract price-sensitive customers looking for budget-friendly options.\\n\\nThese competitors aim to leverage online shopping platforms to reach a broader audience of price-sensitive customers while offering unique and affordable book selections to enhance their market share in the competitive e-commerce landscape.0.142857
    \n", + "
    " + ], + "text/plain": [ + " question \\\n", + "10 How does the integrated focused cost leadership strategy help competitors compete with Amazon in the book industry? \n", + "16 What is the competitive strategy of Barnes and Noble in the e-commerce industry? \n", + "22 Why should online retailers like Amazon be careful about disclosing online sales taxes in their advertisements? \n", + "28 How threatening are substitute products to Amazon's patented tech and customer service? \n", + "57 What strategies are Amazon's book competitors using for market share and how does it compare to Amazon's? \n", + "69 What strategies do Amazon's book competitors use to attract price-sensitive customers and grow market share, leveraging online shopping and unique, cheap books? \n", + "\n", + " mode \\\n", + "10 global search+vector+fulltext \n", + "16 global search+vector+fulltext \n", + "22 global search+vector+fulltext \n", + "28 global search+vector+fulltext \n", + "57 global search+vector+fulltext \n", + "69 global search+vector+fulltext \n", + "\n", + " answer \\\n", + "10 The integrated focused cost leadership strategy helps competitors like Barnes and Noble compete with Amazon in the book industry by allowing them to offer a unique value proposition. By combining cost leadership with differentiation, Barnes and Noble can focus on specific market segments or niches where they can provide distinct products or services at competitive prices. This strategy enables them to target specific customer needs effectively while maintaining cost efficiency, thereby creating a competitive advantage against Amazon's broader market approach. \n", + "16 Barnes and Noble in the e-commerce industry employs an integrated focused cost leadership/differentiation strategy. This strategy combines elements of cost leadership and differentiation to set itself apart from competitors like Amazon and eBay. By focusing on cost efficiency while also offering unique and differentiated products or services, Barnes and Noble aims to carve out a competitive position in the market. \n", + "22 Online retailers like Amazon should be cautious about disclosing online sales taxes in their advertisements to maintain transparency and compliance with regulations. By clearly stating the inclusion of sales taxes, they build trust with customers and avoid potential legal issues related to false advertising or misleading practices. Additionally, disclosing online sales taxes helps prevent any misunderstandings or surprises for customers during the checkout process, leading to a smoother shopping experience and reducing the risk of customer dissatisfaction or complaints. \n", + "28 Substitute products can pose a moderate threat to Amazon's patented technology and customer service. While Amazon's patents provide a level of protection for its innovations, substitute products could still impact its market share and competitive position. Amazon's strong customer service reputation may mitigate some of this threat by fostering customer loyalty. However, continuous innovation and strategic differentiation are crucial for Amazon to stay ahead of potential substitutes in the rapidly evolving e-commerce landscape. \n", + "57 Amazon's book competitors employ various strategies to compete for market share. eBay utilizes both cost leadership and differentiation strategies, aiming to offer competitive pricing and a unique value proposition to customers. Barnes and Noble integrates a focused cost leadership/differentiation strategy, focusing on specific market segments while offering differentiated products and services. Wal-Mart, as the world's largest retail chain, primarily focuses on a cost leadership approach, leveraging its scale to provide competitive prices.\\n\\nIn comparison, Amazon employs a mix of strategies, heavily investing in technologies like video content, cloud services, and its Kindle product. It leverages technologies such as Elastic Block Storage (EBS) and 1-Click Ordering to enhance operations and customer experience. Amazon's strategic focus on innovation, technology, and global market expansion sets it apart from its competitors, allowing it to maintain a strong position in the e-commerce landscape. \n", + "69 Amazon's book competitors employ various strategies to attract price-sensitive customers and expand their market share in the realm of online shopping. eBay, Barnes and Noble, and Wal-Mart utilize different approaches to cater to this segment:\\n\\n1. **eBay**: Utilizes both cost leadership and differentiation strategies to appeal to price-sensitive customers. It offers a wide range of products, including books, at competitive prices while also providing a unique platform for auctions and second-hand items.\\n\\n2. **Barnes and Noble**: Integrates a focused cost leadership/differentiation strategy to stand out in the market. By offering a mix of affordable books along with unique and high-quality selections, they target price-sensitive customers seeking value and variety.\\n\\n3. **Wal-Mart**: Focuses on a cost leadership approach, leveraging its vast retail chain to provide competitive pricing on books and other products. By emphasizing affordability and a wide selection, Wal-Mart aims to attract price-sensitive customers looking for budget-friendly options.\\n\\nThese competitors aim to leverage online shopping platforms to reach a broader audience of price-sensitive customers while offering unique and affordable book selections to enhance their market share in the competitive e-commerce landscape. \n", + "\n", + " faithfulness \n", + "10 0.250000 \n", + "16 0.333333 \n", + "22 0.000000 \n", + "28 0.300000 \n", + "57 0.454545 \n", + "69 0.142857 " + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Lets check question answer for global search where it has scored low\n", + "pd.set_option(\"display.max_colwidth\", None)\n", + "eval_df[[\"question\",\"mode\",\"answer\",\"faithfulness\"]][(eval_df['mode']==\"global search+vector+fulltext\") & (eval_df['faithfulness'] <0.5)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2. Answer Relevancy\n", + "\n", + "Scores the relevancy of the answer according to the given question" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    modequestion_complexitymeanminmaxstd
    0entity search+vectormulti_context0.9442880.8978610.9700960.028123
    1entity search+vectorreasoning0.9672520.9534660.9854860.011735
    2entity search+vectorsimple0.9767750.9507041.0000000.020596
    3fulltextmulti_context0.9398130.9308140.9553020.009615
    4fulltextreasoning0.9549650.9348950.9742460.014387
    5fulltextsimple0.9692420.9240261.0000000.032231
    6global search+vector+fulltextmulti_context0.9340880.9003930.9677280.029599
    7global search+vector+fulltextreasoning0.9537060.9332910.9667640.014332
    8global search+vector+fulltextsimple0.9753290.9732300.9788110.003037
    9graph+vectormulti_context0.9302910.8978610.9579430.026410
    10graph+vectorreasoning0.7647480.0000000.9743050.427745
    11graph+vectorsimple0.9446750.9144510.9894750.033573
    12graph+vector+fulltextmulti_context0.9428060.9227840.9553020.013306
    13graph+vector+fulltextreasoning0.9352750.8926180.9620120.026796
    14graph+vector+fulltextsimple0.9723420.9266900.9999980.032359
    15vectormulti_context0.9352970.9149590.9553380.013688
    16vectorreasoning0.9567080.9348950.9855040.018364
    17vectorsimple0.9686940.9240690.9977440.031505
    \n", + "
    " + ], + "text/plain": [ + " mode question_complexity mean min \\\n", + "0 entity search+vector multi_context 0.944288 0.897861 \n", + "1 entity search+vector reasoning 0.967252 0.953466 \n", + "2 entity search+vector simple 0.976775 0.950704 \n", + "3 fulltext multi_context 0.939813 0.930814 \n", + "4 fulltext reasoning 0.954965 0.934895 \n", + "5 fulltext simple 0.969242 0.924026 \n", + "6 global search+vector+fulltext multi_context 0.934088 0.900393 \n", + "7 global search+vector+fulltext reasoning 0.953706 0.933291 \n", + "8 global search+vector+fulltext simple 0.975329 0.973230 \n", + "9 graph+vector multi_context 0.930291 0.897861 \n", + "10 graph+vector reasoning 0.764748 0.000000 \n", + "11 graph+vector simple 0.944675 0.914451 \n", + "12 graph+vector+fulltext multi_context 0.942806 0.922784 \n", + "13 graph+vector+fulltext reasoning 0.935275 0.892618 \n", + "14 graph+vector+fulltext simple 0.972342 0.926690 \n", + "15 vector multi_context 0.935297 0.914959 \n", + "16 vector reasoning 0.956708 0.934895 \n", + "17 vector simple 0.968694 0.924069 \n", + "\n", + " max std \n", + "0 0.970096 0.028123 \n", + "1 0.985486 0.011735 \n", + "2 1.000000 0.020596 \n", + "3 0.955302 0.009615 \n", + "4 0.974246 0.014387 \n", + "5 1.000000 0.032231 \n", + "6 0.967728 0.029599 \n", + "7 0.966764 0.014332 \n", + "8 0.978811 0.003037 \n", + "9 0.957943 0.026410 \n", + "10 0.974305 0.427745 \n", + "11 0.989475 0.033573 \n", + "12 0.955302 0.013306 \n", + "13 0.962012 0.026796 \n", + "14 0.999998 0.032359 \n", + "15 0.955338 0.013688 \n", + "16 0.985504 0.018364 \n", + "17 0.997744 0.031505 " + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "answer_rel_df = eval_df.groupby(['mode', 'question_complexity']).agg({\n", + " 'answer_relevancy': ['mean', 'min', 'max', 'std']\n", + "}).reset_index()\n", + "answer_rel_df.columns = ['mode', 'question_complexity', 'mean', 'min', 'max', 'std']\n", + "answer_rel_df" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
    " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Grouped bar chart\n", + "metrics = ['mean', 'std']\n", + "x = np.arange(len(answer_rel_df['mode'])) # Label locations\n", + "width = 0.2 # Bar width\n", + "fig, ax = plt.subplots(figsize=(12, 6))\n", + "for i, metric in enumerate(metrics):\n", + " ax.bar(x + i * width, answer_rel_df[metric], width, label=metric)\n", + "ax.set_xticks(x + width * 1.5)\n", + "ax.set_xticklabels(answer_rel_df['mode'] + ' (' + answer_rel_df['question_complexity'] + ')', rotation=45, ha='right')\n", + "ax.set_xlabel('Modes (Question Complexity)')\n", + "ax.set_ylabel('Values')\n", + "ax.set_title('Grouped Bar Chart of Answer Relevancy Metrics by Mode and Question Complexity')\n", + "ax.legend(title=\"Metrics\")\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1.All modes score fairly for answer relevancy except \" graph + vector \" mode for reasoning questions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 3. Answer Correctness\n", + "\n", + "Measures answer correctness compared to ground truth as a combination of factuality and semantic similarity." + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    modequestion_complexitymeanminmaxstd
    0entity search+vectormulti_context0.7424960.5234710.9922690.170004
    1entity search+vectorreasoning0.8035360.5657110.9436290.147004
    2entity search+vectorsimple0.5377660.1726750.7334100.251963
    3fulltextmulti_context0.8033990.4373950.9955870.208676
    4fulltextreasoning0.8533490.6190320.9967040.187401
    5fulltextsimple0.6723180.1753730.9401640.339199
    6global search+vector+fulltextmulti_context0.7355390.6095250.9091840.123181
    7global search+vector+fulltextreasoning0.7278430.4883640.8461430.162029
    8global search+vector+fulltextsimple0.3945940.1721250.6658700.250464
    9graph+vectormulti_context0.7421800.4913980.9181700.188076
    10graph+vectorreasoning0.8584120.6700620.9965560.137474
    11graph+vectorsimple0.5594330.1749660.8452230.300916
    12graph+vector+fulltextmulti_context0.7367840.4372160.9902460.237939
    13graph+vector+fulltextreasoning0.7869990.4557120.9466830.190876
    14graph+vector+fulltextsimple0.6436410.1750820.9343100.339174
    15vectormulti_context0.8133150.5442150.9912020.156961
    16vectorreasoning0.6771240.4754650.8232990.126854
    17vectorsimple0.5845350.1753240.8178120.292301
    \n", + "
    " + ], + "text/plain": [ + " mode question_complexity mean min \\\n", + "0 entity search+vector multi_context 0.742496 0.523471 \n", + "1 entity search+vector reasoning 0.803536 0.565711 \n", + "2 entity search+vector simple 0.537766 0.172675 \n", + "3 fulltext multi_context 0.803399 0.437395 \n", + "4 fulltext reasoning 0.853349 0.619032 \n", + "5 fulltext simple 0.672318 0.175373 \n", + "6 global search+vector+fulltext multi_context 0.735539 0.609525 \n", + "7 global search+vector+fulltext reasoning 0.727843 0.488364 \n", + "8 global search+vector+fulltext simple 0.394594 0.172125 \n", + "9 graph+vector multi_context 0.742180 0.491398 \n", + "10 graph+vector reasoning 0.858412 0.670062 \n", + "11 graph+vector simple 0.559433 0.174966 \n", + "12 graph+vector+fulltext multi_context 0.736784 0.437216 \n", + "13 graph+vector+fulltext reasoning 0.786999 0.455712 \n", + "14 graph+vector+fulltext simple 0.643641 0.175082 \n", + "15 vector multi_context 0.813315 0.544215 \n", + "16 vector reasoning 0.677124 0.475465 \n", + "17 vector simple 0.584535 0.175324 \n", + "\n", + " max std \n", + "0 0.992269 0.170004 \n", + "1 0.943629 0.147004 \n", + "2 0.733410 0.251963 \n", + "3 0.995587 0.208676 \n", + "4 0.996704 0.187401 \n", + "5 0.940164 0.339199 \n", + "6 0.909184 0.123181 \n", + "7 0.846143 0.162029 \n", + "8 0.665870 0.250464 \n", + "9 0.918170 0.188076 \n", + "10 0.996556 0.137474 \n", + "11 0.845223 0.300916 \n", + "12 0.990246 0.237939 \n", + "13 0.946683 0.190876 \n", + "14 0.934310 0.339174 \n", + "15 0.991202 0.156961 \n", + "16 0.823299 0.126854 \n", + "17 0.817812 0.292301 " + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "answer_cor_df = eval_df.groupby(['mode', 'question_complexity']).agg({\n", + " 'answer_correctness': ['mean', 'min', 'max', 'std']\n", + "}).reset_index()\n", + "answer_cor_df.columns = ['mode', 'question_complexity', 'mean', 'min', 'max', 'std']\n", + "answer_cor_df" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
    " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Grouped bar chart\n", + "metrics = ['mean', 'std']\n", + "x = np.arange(len(answer_cor_df['mode'])) # Label locations\n", + "width = 0.2 # Bar width\n", + "fig, ax = plt.subplots(figsize=(12, 6))\n", + "for i, metric in enumerate(metrics):\n", + " ax.bar(x + i * width, answer_cor_df[metric], width, label=metric)\n", + "ax.set_xticks(x + width * 1.5)\n", + "ax.set_xticklabels(answer_cor_df['mode'] + ' (' + answer_cor_df['question_complexity'] + ')', rotation=45, ha='right')\n", + "ax.set_xlabel('Modes (Question Complexity)')\n", + "ax.set_ylabel('Values')\n", + "ax.set_title('Grouped Bar Chart of Answer Correctness Metrics by Mode and Question Complexity')\n", + "ax.legend(title=\"Metrics\")\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. All modes struggle on simple questions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 4. Context Precision\n", + "\n", + "Context Precision is a metric that evaluates whether all of the ground-truth relevant items present in the contexts are ranked higher or not." + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    modequestion_complexitymeanminmaxstd
    0entity search+vectormulti_context0.8000000.01.00.447214
    1entity search+vectorreasoning0.8000000.01.00.447214
    2entity search+vectorsimple0.5000000.01.00.577350
    3fulltextmulti_context0.8333330.01.00.408248
    4fulltextreasoning1.0000001.01.00.000000
    5fulltextsimple1.0000001.01.00.000000
    6global search+vector+fulltextmulti_context0.8000000.01.00.447214
    7global search+vector+fulltextreasoning0.5000000.01.00.577350
    8global search+vector+fulltextsimple0.6666670.01.00.577350
    9graph+vectormulti_context0.8333330.01.00.408248
    10graph+vectorreasoning1.0000001.01.00.000000
    11graph+vectorsimple1.0000001.01.00.000000
    12graph+vector+fulltextmulti_context0.8333330.01.00.408248
    13graph+vector+fulltextreasoning1.0000001.01.00.000000
    14graph+vector+fulltextsimple1.0000001.01.00.000000
    15vectormulti_context0.8333330.01.00.408248
    16vectorreasoning1.0000001.01.00.000000
    17vectorsimple0.7500000.01.00.500000
    \n", + "
    " + ], + "text/plain": [ + " mode question_complexity mean min max \\\n", + "0 entity search+vector multi_context 0.800000 0.0 1.0 \n", + "1 entity search+vector reasoning 0.800000 0.0 1.0 \n", + "2 entity search+vector simple 0.500000 0.0 1.0 \n", + "3 fulltext multi_context 0.833333 0.0 1.0 \n", + "4 fulltext reasoning 1.000000 1.0 1.0 \n", + "5 fulltext simple 1.000000 1.0 1.0 \n", + "6 global search+vector+fulltext multi_context 0.800000 0.0 1.0 \n", + "7 global search+vector+fulltext reasoning 0.500000 0.0 1.0 \n", + "8 global search+vector+fulltext simple 0.666667 0.0 1.0 \n", + "9 graph+vector multi_context 0.833333 0.0 1.0 \n", + "10 graph+vector reasoning 1.000000 1.0 1.0 \n", + "11 graph+vector simple 1.000000 1.0 1.0 \n", + "12 graph+vector+fulltext multi_context 0.833333 0.0 1.0 \n", + "13 graph+vector+fulltext reasoning 1.000000 1.0 1.0 \n", + "14 graph+vector+fulltext simple 1.000000 1.0 1.0 \n", + "15 vector multi_context 0.833333 0.0 1.0 \n", + "16 vector reasoning 1.000000 1.0 1.0 \n", + "17 vector simple 0.750000 0.0 1.0 \n", + "\n", + " std \n", + "0 0.447214 \n", + "1 0.447214 \n", + "2 0.577350 \n", + "3 0.408248 \n", + "4 0.000000 \n", + "5 0.000000 \n", + "6 0.447214 \n", + "7 0.577350 \n", + "8 0.577350 \n", + "9 0.408248 \n", + "10 0.000000 \n", + "11 0.000000 \n", + "12 0.408248 \n", + "13 0.000000 \n", + "14 0.000000 \n", + "15 0.408248 \n", + "16 0.000000 \n", + "17 0.500000 " + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "context_precision_df = eval_df.groupby(['mode', 'question_complexity']).agg({\n", + " 'context_precision': ['mean', 'min', 'max', 'std']\n", + "}).reset_index()\n", + "context_precision_df.columns = ['mode', 'question_complexity', 'mean', 'min', 'max', 'std']\n", + "context_precision_df" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
    " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Grouped bar chart\n", + "metrics = ['mean', 'std']\n", + "x = np.arange(len(context_precision_df['mode'])) # Label locations\n", + "width = 0.2 # Bar width\n", + "fig, ax = plt.subplots(figsize=(12, 6))\n", + "for i, metric in enumerate(metrics):\n", + " ax.bar(x + i * width, context_precision_df[metric], width, label=metric)\n", + "ax.set_xticks(x + width * 1.5)\n", + "ax.set_xticklabels(context_precision_df['mode'] + ' (' + context_precision_df['question_complexity'] + ')', rotation=45, ha='right')\n", + "ax.set_xlabel('Modes (Question Complexity)')\n", + "ax.set_ylabel('Values')\n", + "ax.set_title('Grouped Bar Chart of Context Precision Metrics by Mode and Question Complexity')\n", + "ax.legend(title=\"Metrics\")\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. All modes except entity search + vector score fairly for reasoning questions .\n", + "2. As the metrics calculate context precision we can see for multi context question the score varies for all modes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 5. Context Recall\n", + "\n", + "Context recall measures the extent to which the retrieved context aligns with the annotated answer, treated as the ground truth. It is computed using question, ground truth and the retrieved context, and the values range between 0 and 1, with higher values indicating better performance." + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    modequestion_complexitymeanminmaxstd
    0entity search+vectormulti_context0.5619050.1428571.00.310712
    1entity search+vectorreasoning0.2500000.0000001.00.433013
    2entity search+vectorsimple0.5000000.0000001.00.577350
    3fulltextmulti_context0.9523810.7142861.00.116642
    4fulltextreasoning1.0000001.0000001.00.000000
    5fulltextsimple1.0000001.0000001.00.000000
    6global search+vector+fulltextmulti_context0.5119050.1428571.00.341731
    7global search+vector+fulltextreasoning0.1250000.0000000.50.250000
    8global search+vector+fulltextsimple0.6666670.0000001.00.577350
    9graph+vectormulti_context0.9523810.7142861.00.116642
    10graph+vectorreasoning1.0000001.0000001.00.000000
    11graph+vectorsimple1.0000001.0000001.00.000000
    12graph+vector+fulltextmulti_context0.9523810.7142861.00.116642
    13graph+vector+fulltextreasoning1.0000001.0000001.00.000000
    14graph+vector+fulltextsimple1.0000001.0000001.00.000000
    15vectormulti_context0.9107140.7142861.00.138781
    16vectorreasoning1.0000001.0000001.00.000000
    17vectorsimple1.0000001.0000001.00.000000
    \n", + "
    " + ], + "text/plain": [ + " mode question_complexity mean min \\\n", + "0 entity search+vector multi_context 0.561905 0.142857 \n", + "1 entity search+vector reasoning 0.250000 0.000000 \n", + "2 entity search+vector simple 0.500000 0.000000 \n", + "3 fulltext multi_context 0.952381 0.714286 \n", + "4 fulltext reasoning 1.000000 1.000000 \n", + "5 fulltext simple 1.000000 1.000000 \n", + "6 global search+vector+fulltext multi_context 0.511905 0.142857 \n", + "7 global search+vector+fulltext reasoning 0.125000 0.000000 \n", + "8 global search+vector+fulltext simple 0.666667 0.000000 \n", + "9 graph+vector multi_context 0.952381 0.714286 \n", + "10 graph+vector reasoning 1.000000 1.000000 \n", + "11 graph+vector simple 1.000000 1.000000 \n", + "12 graph+vector+fulltext multi_context 0.952381 0.714286 \n", + "13 graph+vector+fulltext reasoning 1.000000 1.000000 \n", + "14 graph+vector+fulltext simple 1.000000 1.000000 \n", + "15 vector multi_context 0.910714 0.714286 \n", + "16 vector reasoning 1.000000 1.000000 \n", + "17 vector simple 1.000000 1.000000 \n", + "\n", + " max std \n", + "0 1.0 0.310712 \n", + "1 1.0 0.433013 \n", + "2 1.0 0.577350 \n", + "3 1.0 0.116642 \n", + "4 1.0 0.000000 \n", + "5 1.0 0.000000 \n", + "6 1.0 0.341731 \n", + "7 0.5 0.250000 \n", + "8 1.0 0.577350 \n", + "9 1.0 0.116642 \n", + "10 1.0 0.000000 \n", + "11 1.0 0.000000 \n", + "12 1.0 0.116642 \n", + "13 1.0 0.000000 \n", + "14 1.0 0.000000 \n", + "15 1.0 0.138781 \n", + "16 1.0 0.000000 \n", + "17 1.0 0.000000 " + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "context_recall_df = eval_df.groupby(['mode', 'question_complexity']).agg({\n", + " 'context_recall': ['mean', 'min', 'max', 'std']\n", + "}).reset_index()\n", + "context_recall_df.columns = ['mode', 'question_complexity', 'mean', 'min', 'max', 'std']\n", + "context_recall_df" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
    " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Grouped bar chart\n", + "metrics = ['mean', 'std']\n", + "x = np.arange(len(context_recall_df['mode'])) # Label locations\n", + "width = 0.2 # Bar width\n", + "fig, ax = plt.subplots(figsize=(12, 6))\n", + "for i, metric in enumerate(metrics):\n", + " ax.bar(x + i * width, context_recall_df[metric], width, label=metric)\n", + "ax.set_xticks(x + width * 1.5)\n", + "ax.set_xticklabels(context_recall_df['mode'] + ' (' + context_recall_df['question_complexity'] + ')', rotation=45, ha='right')\n", + "ax.set_xlabel('Modes (Question Complexity)')\n", + "ax.set_ylabel('Values')\n", + "ax.set_title('Grouped Bar Chart of Context Recall Metrics by Mode and Question Complexity')\n", + "ax.legend(title=\"Metrics\")\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. \"entity search + vector\" and \"global search + vector + fulltext \" modes are performing poor for all question complexities. other modes have scored perfect score for context recall." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "myenv312", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From b663ff43572dabb154cee4ec38ed6bf6ba435ab5 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Mon, 30 Sep 2024 04:59:06 +0000 Subject: [PATCH 145/292] deleted unused file --- frontend/src/HOC/SettingModalHOC.tsx | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 frontend/src/HOC/SettingModalHOC.tsx diff --git a/frontend/src/HOC/SettingModalHOC.tsx b/frontend/src/HOC/SettingModalHOC.tsx deleted file mode 100644 index 24d84bbff..000000000 --- a/frontend/src/HOC/SettingModalHOC.tsx +++ /dev/null @@ -1,28 +0,0 @@ -// import React from 'react'; -// import { SettingsModalProps } from '../types'; -// import SettingsModal from '../components/Popups/Settings/SettingModal'; - -// const SettingModalHOC: React.FC = ({ -// openTextSchema, -// open, -// onClose, -// isSchema, -// settingView, -// setIsSchema, -// onContinue, -// onClear, -// }) => { -// return ( -// -// ); -// }; -// export default SettingModalHOC; From 7c653443fcddd50ce93979428a0491b0a4c40fd4 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Mon, 30 Sep 2024 05:32:28 +0000 Subject: [PATCH 146/292] code optimization using memo --- .../src/components/ChatBot/ChatInfoModal.tsx | 8 ++- .../ChatBot/ExpandedChatButtonContainer.tsx | 4 +- frontend/src/components/Dropdown.tsx | 4 +- frontend/src/components/Layout/Header.tsx | 14 +++-- frontend/src/components/Layout/SideNav.tsx | 55 +++++++------------ .../Popups/DeletePopUp/DeletePopUp.tsx | 5 +- .../LargeFilePopUp/ConfirmationDialog.tsx | 5 +- .../Popups/RetryConfirmation/Index.tsx | 4 +- frontend/src/components/QuickStarter.tsx | 12 +--- frontend/src/components/UI/TipWrapper.tsx | 20 +++++++ frontend/src/context/UserMessages.tsx | 7 +-- frontend/src/types.ts | 1 + frontend/src/utils/Constants.ts | 6 +- 13 files changed, 75 insertions(+), 70 deletions(-) create mode 100644 frontend/src/components/UI/TipWrapper.tsx diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index d4fb89af9..894833884 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -100,11 +100,13 @@ const ChatInfoModal: React.FC = ({ if (response.data.status === 'Failure') { throw new Error(response.data.error); } - const nodesData = response?.data?.data?.nodes.map((f: Node) => f) + const nodesData = response?.data?.data?.nodes + .map((f: Node) => f) .filter((node: ExtendedNode) => node.labels.length === 1); const nodeIds = new Set(nodesData.map((node: any) => node.element_id)); - const relationshipsData = response?.data?.data?.relationships.map((f: Relationship) => f) - .filter((rel: any) => nodeIds.has(rel.end_node_element_id) && nodeIds.has(rel.start_node_element_id));; + const relationshipsData = response?.data?.data?.relationships + .map((f: Relationship) => f) + .filter((rel: any) => nodeIds.has(rel.end_node_element_id) && nodeIds.has(rel.start_node_element_id)); const communitiesData = response?.data?.data?.community_data; const chunksData = response?.data?.data?.chunk_data; diff --git a/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx index 465687c4b..44bddc805 100644 --- a/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx +++ b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx @@ -4,7 +4,7 @@ import { Box, IconButton } from '@neo4j-ndl/react'; import { IconProps } from '../../types'; import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { tooltips } from '../../utils/Constants'; -import { useState } from 'react'; +import { memo, useState } from 'react'; import { RiChatSettingsLine } from 'react-icons/ri'; const ExpandedChatButtonContainer: React.FC = ({ closeChatBot, deleteOnClick, messages }) => { @@ -51,4 +51,4 @@ const ExpandedChatButtonContainer: React.FC = ({ closeChatBot, delete ); }; -export default ExpandedChatButtonContainer; +export default memo(ExpandedChatButtonContainer); diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index ba949aec0..cdf658824 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -1,6 +1,6 @@ import { Dropdown, Tip } from '@neo4j-ndl/react'; import { OptionType, ReusableDropdownProps } from '../types'; -import { useMemo, useReducer } from 'react'; +import { memo, useMemo, useReducer } from 'react'; import { capitalize } from '../utils/Utils'; const DropdownComponent: React.FC = ({ @@ -68,4 +68,4 @@ const DropdownComponent: React.FC = ({ ); }; -export default DropdownComponent; +export default memo(DropdownComponent); diff --git a/frontend/src/components/Layout/Header.tsx b/frontend/src/components/Layout/Header.tsx index acf62eefa..752c31660 100644 --- a/frontend/src/components/Layout/Header.tsx +++ b/frontend/src/components/Layout/Header.tsx @@ -7,12 +7,15 @@ import { InformationCircleIconOutline, } from '@neo4j-ndl/react/icons'; import { Typography } from '@neo4j-ndl/react'; -import { useCallback, useEffect } from 'react'; +import { memo, useCallback, useContext, useEffect } from 'react'; import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { tooltips } from '../../utils/Constants'; import { useFileContext } from '../../context/UsersFiles'; +import { ThemeWrapperContext } from '../../context/ThemeWrapper'; + +function Header() { + const { colorMode, toggleColorMode } = useContext(ThemeWrapperContext); -export default function Header({ themeMode, toggleTheme }: { themeMode: string; toggleTheme: () => void }) { const handleURLClick = useCallback((url: string) => { window.open(url, '_blank'); }, []); @@ -38,7 +41,7 @@ export default function Header({ themeMode, toggleTheme }: { themeMode: string;
    Neo4j Logo @@ -75,10 +78,10 @@ export default function Header({ themeMode, toggleTheme }: { themeMode: string; text={tooltips.theme} clean size='large' - onClick={toggleTheme} + onClick={toggleColorMode} placement='left' > - {themeMode === 'dark' ? ( + {colorMode === 'dark' ? ( @@ -95,3 +98,4 @@ export default function Header({ themeMode, toggleTheme }: { themeMode: string;
    ); } +export default memo(Header); diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index 526150db4..e07979405 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -23,6 +23,7 @@ import S3Component from '../DataSources/AWS/S3Bucket'; import WebButton from '../DataSources/Web/WebButton'; import DropZoneForSmallLayouts from '../DataSources/Local/DropZoneForSmallLayouts'; import { useCredentials } from '../../context/UserCredentials'; +import TipWrapper from '../UI/TipWrapper'; const SideNav: React.FC = ({ position, @@ -101,12 +102,9 @@ const SideNav: React.FC = ({ - - - - {tooltips.sources} - + + + } /> )} @@ -115,12 +113,9 @@ const SideNav: React.FC = ({ - - - - {tooltips.chat} - + + + } /> )} @@ -128,48 +123,36 @@ const SideNav: React.FC = ({ {!largedesktops && position === 'left' && !isReadOnlyUser && ( - - - - Local files - + + + } /> )} {!largedesktops && APP_SOURCES.includes('gcs') && position === 'left' && !isReadOnlyUser && ( - - - - GCS Files - + + + } /> )} {!largedesktops && APP_SOURCES.includes('s3') && position === 'left' && !isReadOnlyUser && ( - - - - S3 Files - + + + } /> )} {!largedesktops && APP_SOURCES.includes('web') && position === 'left' && !isReadOnlyUser && ( - - - - Web Sources - + + + } > )} diff --git a/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx b/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx index 1b1bd58c8..cbe69c492 100644 --- a/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx +++ b/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx @@ -1,6 +1,6 @@ import { Button, Checkbox, Dialog } from '@neo4j-ndl/react'; -import { useState } from 'react'; -export default function DeletePopUp({ +import { memo, useState } from 'react'; +function DeletePopUp({ open, no_of_files, deleteHandler, @@ -55,3 +55,4 @@ export default function DeletePopUp({ ); } +export default memo(DeletePopUp); diff --git a/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx b/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx index 8d04467ff..08991c44b 100644 --- a/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx +++ b/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx @@ -1,10 +1,10 @@ import { Button, Dialog, Typography } from '@neo4j-ndl/react'; import { CustomFile } from '../../../types'; import LargeFilesAlert from './LargeFilesAlert'; -import { useEffect, useState } from 'react'; +import { memo, useEffect, useState } from 'react'; import { useFileContext } from '../../../context/UsersFiles'; -export default function ConfirmationDialog({ +function ConfirmationDialog({ largeFiles, open, onClose, @@ -111,3 +111,4 @@ export default function ConfirmationDialog({ ); } +export default memo(ConfirmationDialog); diff --git a/frontend/src/components/Popups/RetryConfirmation/Index.tsx b/frontend/src/components/Popups/RetryConfirmation/Index.tsx index d7b160e8e..a510352aa 100644 --- a/frontend/src/components/Popups/RetryConfirmation/Index.tsx +++ b/frontend/src/components/Popups/RetryConfirmation/Index.tsx @@ -4,8 +4,9 @@ import { useFileContext } from '../../../context/UsersFiles'; import { capitalize } from '../../../utils/Utils'; import { BannerAlertProps } from '../../../types'; import ButtonWithToolTip from '../../UI/ButtonWithToolTip'; +import { memo } from 'react'; -export default function RetryConfirmationDialog({ +function RetryConfirmationDialog({ open, onClose, fileId, @@ -82,3 +83,4 @@ export default function RetryConfirmationDialog({ ); } +export default memo(RetryConfirmationDialog); \ No newline at end of file diff --git a/frontend/src/components/QuickStarter.tsx b/frontend/src/components/QuickStarter.tsx index db2cba7e7..1a4e169d2 100644 --- a/frontend/src/components/QuickStarter.tsx +++ b/frontend/src/components/QuickStarter.tsx @@ -1,6 +1,5 @@ import Header from './Layout/Header'; import React, { useState } from 'react'; -import { ThemeWrapperContext } from '../context/ThemeWrapper'; import PageLayout from './Layout/PageLayout'; import { FileContextProvider } from '../context/UsersFiles'; import UserCredentialsWrapper from '../context/UserCredentials'; @@ -8,16 +7,7 @@ import AlertContextWrapper from '../context/Alert'; import { MessageContextWrapper } from '../context/UserMessages'; const QuickStarter: React.FunctionComponent = () => { - const themeUtils = React.useContext(ThemeWrapperContext); - const [themeMode, setThemeMode] = useState(themeUtils.colorMode); const [showSettingsModal, setshowSettingsModal] = useState(false); - - const toggleColorMode = () => { - setThemeMode((prevThemeMode) => { - return prevThemeMode === 'light' ? 'dark' : 'light'; - }); - themeUtils.toggleColorMode(); - }; const openSettingsModal = () => { setshowSettingsModal(true); }; @@ -30,7 +20,7 @@ const QuickStarter: React.FunctionComponent = () => { -
    +
    + {children} + {tooltip} + + ); +} +export default TipWrapper; diff --git a/frontend/src/context/UserMessages.tsx b/frontend/src/context/UserMessages.tsx index 0e7b7abde..e586621f9 100644 --- a/frontend/src/context/UserMessages.tsx +++ b/frontend/src/context/UserMessages.tsx @@ -1,14 +1,11 @@ import { createContext, useState, useContext, FC } from 'react'; import { MessagesContextProviderProps, Messages, MessageContextType } from '../types'; -import chatbotmessages from '../assets/ChatbotMessages.json'; -import { getDateTime } from '../utils/Utils'; +import { getDefaultMessage } from '../utils/Constants'; const MessageContext = createContext(undefined); const MessageContextWrapper: FC = ({ children }) => { - const [messages, setMessages] = useState([ - { ...chatbotmessages.listMessages[1], datetime: getDateTime() }, - ]); + const [messages, setMessages] = useState(getDefaultMessage); const [clearHistoryData, setClearHistoryData] = useState(false); const value: MessageContextType = { diff --git a/frontend/src/types.ts b/frontend/src/types.ts index f407ca350..5dc09f418 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -738,3 +738,4 @@ export interface FileContextType { postProcessingVal: boolean; setPostProcessingVal: Dispatch>; } +export declare type Side = 'top' | 'right' | 'bottom' | 'left'; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index daf3ef0f2..a9a41c0a2 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -1,6 +1,7 @@ import { NvlOptions } from '@neo4j-nvl/base'; import { GraphType, OptionType } from '../types'; -import { getDescriptionForChatMode } from './Utils'; +import { getDateTime, getDescriptionForChatMode } from './Utils'; +import chatbotmessages from '../assets/ChatbotMessages.json'; export const document = `+ [docs]`; @@ -309,3 +310,6 @@ export const connectionLabels = { greenStroke: 'green', redStroke: 'red', }; +export const getDefaultMessage = () => { + return [{ ...chatbotmessages.listMessages[1], datetime: getDateTime() }]; +}; From c69b70884b64f6f0cabcdb02dd9f692ad1bf40cd Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Mon, 30 Sep 2024 12:54:50 +0000 Subject: [PATCH 147/292] Added elapsed_time on each api and getiing time per_entity --- backend/score.py | 81 +++++++++++++++++++++++++++++++++++---------- backend/src/main.py | 6 ++-- 2 files changed, 68 insertions(+), 19 deletions(-) diff --git a/backend/score.py b/backend/score.py index a9e8e0b93..eba6c3a00 100644 --- a/backend/score.py +++ b/backend/score.py @@ -90,6 +90,7 @@ async def create_source_knowledge_graph_url( ): try: + start = time.time() if source_url is not None: source = source_url else: @@ -115,9 +116,12 @@ async def create_source_knowledge_graph_url( return create_api_response('Failed',message='source_type is other than accepted source') message = f"Source Node created successfully for source type: {source_type} and source: {source}" - json_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") - return create_api_response("Success",message=message,success_count=success_count,failed_count=failed_count,file_name=lst_file_name) + result ={'elapsed_api_time' : f'{elapsed_time:.2f}'} + return create_api_response("Success",message=message,success_count=success_count,failed_count=failed_count,file_name=lst_file_name,data=result) except Exception as e: error_message = str(e) message = f" Unable to create source node for source type: {source_type} and source: {source}" @@ -198,7 +202,7 @@ async def extract_knowledge_graph_from_file( extract_graph_from_file_gcs, uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition) else: return create_api_response('Failed',message='source_type is other than accepted source') - + extract_api_time = time.time() - start_time if result is not None: result['db_url'] = uri result['api_name'] = 'extract' @@ -206,8 +210,8 @@ async def extract_knowledge_graph_from_file( result['wiki_query'] = wiki_query result['source_type'] = source_type result['logging_time'] = formatted_time(datetime.now(timezone.utc)) + result['elapsed_api_time'] = f'{extract_api_time:.2f}' logger.log_struct(result, "INFO") - extract_api_time = time.time() - start_time logging.info(f"extraction completed in {extract_api_time:.2f} seconds for file name {file_name}") return create_api_response('Success', data=result, file_source= source_type) except Exception as e: @@ -237,13 +241,16 @@ async def get_source_list(uri:str, userName:str, password:str, database:str=None Calls 'get_source_list_from_graph' which returns list of sources which already exist in databse """ try: + start = time.time() decoded_password = decode_password(password) if " " in uri: uri = uri.replace(" ","+") result = await asyncio.to_thread(get_source_list_from_graph,uri,userName,decoded_password,database) - json_obj = {'api_name':'sources_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'sources_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") - return create_api_response("Success",data=result) + return create_api_response("Success",data=result, message=f"Total elapsed API time {elapsed_time:.2f}") except Exception as e: job_status = "Failed" message="Unable to fetch source list" @@ -307,7 +314,7 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), logging.info(f"Total Response time is {total_call_time:.2f} seconds") result["info"]["response_time"] = round(total_call_time, 2) - json_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc))} + json_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{total_call_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result) @@ -323,10 +330,13 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), @app.post("/chunk_entities") async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), nodedetails=Form(None),entities=Form(),mode=Form()): try: + start = time.time() result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,nodedetails=nodedetails,entities=entities,mode=mode) - json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") - return create_api_response('Success',data=result) + return create_api_response('Success',data=result,message=f"Total elapsed API time {elapsed_time:.2f}") except Exception as e: job_status = "Failed" message="Unable to extract entities from chunk ids" @@ -345,7 +355,8 @@ async def graph_query( document_names: str = Form(None), ): try: - print(document_names) + # print(document_names) + start = time.time() result = await asyncio.to_thread( get_graph_results, uri=uri, @@ -354,9 +365,11 @@ async def graph_query( database=database, document_names=document_names ) - json_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc))} + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") - return create_api_response('Success', data=result) + return create_api_response('Success', data=result,message=f"Total elapsed API time {elapsed_time:.2f}") except Exception as e: job_status = "Failed" message = "Unable to get graph query response" @@ -385,10 +398,14 @@ async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=F @app.post("/connect") async def connect(uri=Form(), userName=Form(), password=Form(), database=Form()): try: + start = time.time() graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph, database) - json_obj = {'api_name':'connect','db_url':uri,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc))} + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'connect','db_url':uri,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") + result['elapsed_api_time'] = f'{elapsed_time:.2f}' return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -402,10 +419,14 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber originalname=Form(None), model=Form(None), uri=Form(), userName=Form(), password=Form(), database=Form()): try: + start = time.time() graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(upload_file, graph, model, file, chunkNumber, totalChunks, originalname, uri, CHUNK_DIR, MERGED_DIR) - json_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") + result['elapsed_api_time'] = f'{elapsed_time:.2f}' if int(chunkNumber) == int(totalChunks): return create_api_response('Success',data=result, message='Source Node Created Successfully') else: @@ -423,12 +444,15 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber @app.post("/schema") async def get_structured_schema(uri=Form(), userName=Form(), password=Form(), database=Form()): try: + start = time.time() graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(get_labels_and_relationtypes, graph) + end = time.time() + elapsed_time = end - start logging.info(f'Schema result from DB: {result}') - json_obj = {'api_name':'schema','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + json_obj = {'api_name':'schema','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") - return create_api_response('Success', data=result) + return create_api_response('Success', data=result,message=f"Total elapsed API time {elapsed_time:.2f}") except Exception as e: message="Unable to get the labels and relationtypes from neo4j database" error_message = str(e) @@ -490,12 +514,15 @@ async def delete_document_and_entities(uri=Form(), source_types=Form(), deleteEntities=Form()): try: + start = time.time() graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) result, files_list_size = await asyncio.to_thread(graphDb_data_Access.delete_file_from_graph, filenames, source_types, deleteEntities, MERGED_DIR, uri) # entities_count = result[0]['deletedEntities'] if 'deletedEntities' in result[0] else 0 message = f"Deleted {files_list_size} documents with entities from database" - json_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response('Success',message=message) except Exception as e: @@ -574,9 +601,14 @@ async def populate_graph_schema(input_text=Form(None), model=Form(None), is_sche @app.post("/get_unconnected_nodes_list") async def get_unconnected_nodes_list(uri=Form(), userName=Form(), password=Form(), database=Form()): try: + start = time.time() graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes() + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'get_unconnected_nodes_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=nodes_list,message=total_nodes) except Exception as e: job_status = "Failed" @@ -590,9 +622,14 @@ async def get_unconnected_nodes_list(uri=Form(), userName=Form(), password=Form( @app.post("/delete_unconnected_nodes") async def delete_orphan_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(),unconnected_entities_list=Form()): try: + start = time.time() graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.delete_unconnected_nodes(unconnected_entities_list) + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'delete_unconnected_nodes','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result,message="Unconnected entities delete successfully") except Exception as e: job_status = "Failed" @@ -606,9 +643,14 @@ async def delete_orphan_nodes(uri=Form(), userName=Form(), password=Form(), data @app.post("/get_duplicate_nodes") async def get_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form()): try: + start = time.time() graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) nodes_list, total_nodes = graphDb_data_Access.get_duplicate_nodes_list() + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'get_duplicate_nodes','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=nodes_list, message=total_nodes) except Exception as e: job_status = "Failed" @@ -622,9 +664,14 @@ async def get_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), data @app.post("/merge_duplicate_nodes") async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(),duplicate_nodes_list=Form()): try: + start = time.time() graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.merge_duplicate_nodes(duplicate_nodes_list) + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'merge_duplicate_nodes','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result,message="Duplicate entities merged successfully") except Exception as e: job_status = "Failed" diff --git a/backend/src/main.py b/backend/src/main.py index ce8c9812a..d48947811 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -309,6 +309,7 @@ def processing_source(uri, userName, password, database, model, file_name, pages elapsed_get_chunkId_chunkDoc_list = end_get_chunkId_chunkDoc_list - start_get_chunkId_chunkDoc_list logging.info(f'Time taken to create list chunkids with chunk document: {elapsed_get_chunkId_chunkDoc_list:.2f} seconds') uri_latency["create_list_chunk_and_document"] = f'{elapsed_get_chunkId_chunkDoc_list:.2f}' + uri_latency["total_chunks"] = total_chunks start_status_document_node = time.time() result = graphDb_data_Access.get_current_status_document_node(file_name) @@ -369,8 +370,8 @@ def processing_source(uri, userName, password, database, model, file_name, pages processing_chunks_end_time = time.time() processing_chunks_elapsed_end_time = processing_chunks_end_time - processing_chunks_start_time logging.info(f"Time taken {update_graph_chunk_processed} chunks processed upto {select_chunks_upto} completed in {processing_chunks_elapsed_end_time:.2f} seconds for file name {file_name}") - uri_latency[f'processed_combine_chunk_{i}'] = f'{processing_chunks_elapsed_end_time:.2f}' - uri_latency[f'processed_chunk_detail_{i}'] = latency_processed_chunk + uri_latency[f'processed_combine_chunk_{i}-{select_chunks_upto}'] = f'{processing_chunks_elapsed_end_time:.2f}' + uri_latency[f'processed_chunk_detail_{i}-{select_chunks_upto}'] = latency_processed_chunk end_time = datetime.now() processed_time = end_time - start_time @@ -418,6 +419,7 @@ def processing_source(uri, userName, password, database, model, file_name, pages processing_source_func = time.time() - processing_source_start_time logging.info(f"Time taken to processing source function completed in {processing_source_func:.2f} seconds for file name {file_name}") uri_latency["Processed_source"] = f'{processing_source_func:.2f}' + uri_latency["Per_entity_latency"] = f'{int(processing_source_func)/node_count:.2f}/s' uri_latency["fileName"] = file_name uri_latency["nodeCount"] = node_count uri_latency["relationshipCount"] = rel_count From 393c53c53f9a88e1e1a13b12bd1b5320bd5cb876 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 1 Oct 2024 05:46:24 +0000 Subject: [PATCH 148/292] Added the post processing Alert showcasing the ongoing post processing jobs --- frontend/src/components/Content.tsx | 7 ++-- .../PostProcessingToast.tsx | 18 ++++++++ .../SelectedJobList.tsx | 42 +++++++++++++++++++ .../Popups/RetryConfirmation/Index.tsx | 2 +- frontend/src/utils/toasts.ts | 3 +- 5 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/PostProcessingToast.tsx create mode 100644 frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 53bf47058..f74ca7020 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -34,6 +34,7 @@ import RetryConfirmationDialog from './Popups/RetryConfirmation/Index'; import retry from '../services/retry'; import { showErrorToast, showNormalToast, showSuccessToast } from '../utils/toasts'; import { useMessageContext } from '../context/UserMessages'; +import PostProcessingToast from './Popups/GraphEnhancementDialog/PostProcessingCheckList/PostProcessingToast'; const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); @@ -159,12 +160,12 @@ const Content: React.FC = ({ } if (processedCount === 1 && queue.isEmpty()) { (async () => { - showNormalToast('Some Q&A functionality will only be available afterwards.'); + showNormalToast(); await postProcessing(userCredentials as UserCredentials, postProcessingTasks); showSuccessToast('All Q&A functionality is available now.'); })(); } - }, [processedCount, userCredentials, queue, isReadOnlyUser]); + }, [processedCount, userCredentials, queue, isReadOnlyUser, isGdsActive]); useEffect(() => { if (afterFirstRender) { @@ -393,7 +394,7 @@ const Content: React.FC = ({ const addFilesToQueue = async (remainingFiles: CustomFile[]) => { if (!remainingFiles.length) { - showNormalToast('Some Q&A functionality will only be available afterwards.'); + showNormalToast(); await postProcessing(userCredentials as UserCredentials, postProcessingTasks); showSuccessToast('All Q&A functionality is available now.'); } diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/PostProcessingToast.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/PostProcessingToast.tsx new file mode 100644 index 000000000..efb9ca8a2 --- /dev/null +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/PostProcessingToast.tsx @@ -0,0 +1,18 @@ +import { Flex, Typography } from '@neo4j-ndl/react'; +import SelectedJobList from './SelectedJobList'; + +export default function PostProcessingToast({ + postProcessingTasks, + isGdsActive, +}: { + postProcessingTasks: string[]; + isGdsActive: boolean; +}) { + return ( + + Some Q&A functionality will only be available afterwards + Ongoing Post Processing Jobs + + + ); +} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx new file mode 100644 index 000000000..1cb3c7310 --- /dev/null +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx @@ -0,0 +1,42 @@ +import { Checkbox, Flex, Typography } from '@neo4j-ndl/react'; +import { capitalize } from '../../../../utils/Utils'; +import { useMemo } from 'react'; + +export default function SelectedJobList({ + postProcessingTasks, + isGdsActive, +}: { + postProcessingTasks: string[]; + isGdsActive: boolean; +}) { + const ongoingPostProcessingTasks = useMemo( + () => + (isGdsActive + ? postProcessingTasks.includes('enable_communities') + ? postProcessingTasks + : postProcessingTasks.filter((s) => s != 'enable_communities') + : postProcessingTasks.filter((s) => s != 'enable_communities')), + [isGdsActive, postProcessingTasks] + ); + return ( + + {ongoingPostProcessingTasks.map((task) => { + return ( + + {task + .split('_') + .map((s) => capitalize(s)) + .join(' ')} + + } + checked={true} + disabled={true} + aria-label='Selected-postprocessing-jobs' + /> + ); + })} + + ); +} diff --git a/frontend/src/components/Popups/RetryConfirmation/Index.tsx b/frontend/src/components/Popups/RetryConfirmation/Index.tsx index a510352aa..6ebb02e33 100644 --- a/frontend/src/components/Popups/RetryConfirmation/Index.tsx +++ b/frontend/src/components/Popups/RetryConfirmation/Index.tsx @@ -83,4 +83,4 @@ function RetryConfirmationDialog({ ); } -export default memo(RetryConfirmationDialog); \ No newline at end of file +export default memo(RetryConfirmationDialog); diff --git a/frontend/src/utils/toasts.ts b/frontend/src/utils/toasts.ts index 832c33b9a..ebe6ae709 100644 --- a/frontend/src/utils/toasts.ts +++ b/frontend/src/utils/toasts.ts @@ -1,4 +1,5 @@ import { toast } from '@neo4j-ndl/react'; +import { ReactNode } from 'react'; export const showErrorToast = (message: string, shouldAutoClose: boolean = true) => { return toast.danger(message, { @@ -14,7 +15,7 @@ export const showSuccessToast = (message: string) => { }); }; -export const showNormalToast = (message: string) => { +export const showNormalToast = (message: string | ReactNode) => { return toast.neutral(message, { isCloseable: true, shouldAutoClose: true, From 0dec005465baee7e165f9e25a6d530a38d7280e7 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 1 Oct 2024 11:54:23 +0000 Subject: [PATCH 149/292] fix: readonly user retry option disable --- frontend/src/components/FileTable.tsx | 33 +++++++++++++-------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 8855a746e..4b6d4bb7e 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -146,22 +146,21 @@ const FileTable = forwardRef((props, ref) => { > {info.getValue()} - {(info.getValue() === 'Completed' || - info.getValue() === 'Failed' || - (info.getValue() === 'Cancelled' && !isReadOnlyUser)) && ( - - onRetry(info?.row?.id as string)} - > - - - - )} + {(info.getValue() === 'Completed' || info.getValue() === 'Failed' || info.getValue() === 'Cancelled') && + !isReadOnlyUser && ( + + onRetry(info?.row?.id as string)} + > + + + + )}
    ); } else if (info.getValue() === 'Processing' && info.row.original.processingProgress === undefined) { @@ -511,7 +510,7 @@ const FileTable = forwardRef((props, ref) => { footer: (info) => info.column.id, }), ], - [filesData.length, statusFilter, filetypeFilter, llmtypeFilter, fileSourceFilter] + [filesData.length, statusFilter, filetypeFilter, llmtypeFilter, fileSourceFilter, isReadOnlyUser] ); const table = useReactTable({ From ac3d88a9d18d6e7e67b483a77392004b9f359297 Mon Sep 17 00:00:00 2001 From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Date: Thu, 3 Oct 2024 09:30:01 +0000 Subject: [PATCH 150/292] update script to get details of extarcted doc --- backend/test_integrationqa.py | 75 ++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 832546995..0281c91f8 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -7,21 +7,22 @@ from dotenv import load_dotenv from score import * from src.main import * -from src.QA_integration_new import QA_RAG +from src.QA_integration import QA_RAG from langserve import add_routes # Load environment variables if needed load_dotenv() -# Constants -URI = '' -USERNAME = '' -PASSWORD = '' -DATABASE = 'neo4j' +import os +URI = os.getenv('NEO4J_URI') +USERNAME = os.getenv('NEO4J_USERNAME') +PASSWORD = os.getenv('NEO4J_PASSWORD') +DATABASE = os.getenv('NEO4J_DATABASE') + CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") MERGED_DIR = os.path.join(os.path.dirname(__file__), "merged_files") # Initialize database connection -graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) +graph = create_graph_database_connection(URI,USERNAME,PASSWORD,DATABASE) def create_source_node_local(graph, model, file_name): """Creates a source node for a local file.""" @@ -36,6 +37,17 @@ def create_source_node_local(graph, model, file_name): graphDB_data_Access.create_source_node(source_node) return source_node +def delete_extracted_files(file_path): + """Delete the extracted files once extraction process is completed""" + try: + if os.path.exists(file_path): + os.remove(file_path) + logging.info(f"Deleted file:{file_path}") + else: + logging.warning(f"File not found for deletion: {file_path}") + except Exception as e: + logging.error(f"Failed to delete file {file_path}. Error: {e}") + def test_graph_from_file_local(model_name): """Test graph creation from a local file.""" file_name = 'About Amazon.pdf' @@ -51,13 +63,16 @@ def test_graph_from_file_local(model_name): logging.info("Local file processing complete") print(local_file_result) - try: - assert local_file_result['status'] == 'Completed' - assert local_file_result['nodeCount'] > 0 - assert local_file_result['relationshipCount'] > 0 - print("Success") - except AssertionError as e: - print("Fail: ", e) +# try: +# assert local_file_result['status'] == 'Completed' +# assert local_file_result['nodeCount'] > 0 +# assert local_file_result['relationshipCount'] > 0 +# print("Success") +# except AssertionError as e: +# print("Fail: ", e) + + # Delete the file after processing + delete_extracted_files(merged_file_path) return local_file_result @@ -182,25 +197,11 @@ def test_populate_graph_schema_from_text(model): print(result_schema) return result_schema -# def compare_graph_results(results): -# """ -# Compare graph results across different models. -# Add custom logic here to compare graph data, nodes, and relationships. -# """ -# # Placeholder logic for comparison -# print("Comparing results...") -# for i in range(len(results) - 1): -# result_a = results[i] -# result_b = results[i + 1] -# if result_a == result_b: -# print(f"Result {i} is identical to result {i+1}") -# else: -# print(f"Result {i} differs from result {i+1}") def run_tests(): final_list = [] error_list = [] - models = ['openai-gpt-3.5','openai-gpt-4o','openai-gpt-4o-mini','gemini-1.0-pro','gemini-1.5-pro','azure_ai_gpt_35','azure_ai_gpt_4o','ollama_llama3','groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_v3p1_405b','bedrock_claude_3_5_sonnet'] + models = ['openai-gpt-3.5','openai-gpt-4o','openai-gpt-4o-mini','gemini-1.5-pro','gemini 1.5 Flash','azure_ai_gpt_35','azure_ai_gpt_4o','ollama_llama3','ollama','groq_llama3_70b','anthropic_claude_3_5_sonnet','bedrock_claude_3_5_sonnet','fireworks_llama_v3p2_90b'] for model_name in models: try: @@ -208,7 +209,7 @@ def run_tests(): final_list.append(test_graph_from_wikipedia(model_name)) final_list.append(test_populate_graph_schema_from_text(model_name)) final_list.append(test_graph_website(model_name)) - # final_list.append(test_graph_from_youtube_video(model_name)) + final_list.append(test_graph_from_youtube_video(model_name)) final_list.append(test_chatbot_qna(model_name)) final_list.append(test_chatbot_qna(model_name, mode='vector')) final_list.append(test_chatbot_qna(model_name, mode='graph+vector')) @@ -221,19 +222,19 @@ def run_tests(): # #Compare and log diffrences in graph results # # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results # test_populate_graph_schema_from_text('openai-gpt-4o') - dis_elementid, dis_status = disconected_nodes() - lst_element_id = [dis_elementid] - delt = delete_disconected_nodes(lst_element_id) - dup = get_duplicate_nodes() +# dis_elementid, dis_status = disconected_nodes() +# lst_element_id = [dis_elementid] +# delt = delete_disconected_nodes(lst_element_id) +# dup = get_duplicate_nodes() print(final_list) # schma = test_populate_graph_schema_from_text(model) # Save final results to CSV df = pd.DataFrame(final_list) print(df) df['execution_date'] = dt.today().strftime('%Y-%m-%d') - df['disconnected_nodes']=dis_status - df['get_duplicate_nodes']=dup - df['delete_disconected_nodes']=delt +# df['disconnected_nodes']=dis_status +# df['get_duplicate_nodes']=dup +# df['delete_disconected_nodes']=delt # df['test_populate_graph_schema_from_text'] = schma df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) From 83351acf6aeac43a7c7f7c6ef2c1e46f8fc78007 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Thu, 3 Oct 2024 09:41:08 +0000 Subject: [PATCH 151/292] Issue fixed, Latency count per entity --- backend/score.py | 1 - backend/src/main.py | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/score.py b/backend/score.py index eba6c3a00..8887d6450 100644 --- a/backend/score.py +++ b/backend/score.py @@ -426,7 +426,6 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber elapsed_time = end - start json_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") - result['elapsed_api_time'] = f'{elapsed_time:.2f}' if int(chunkNumber) == int(totalChunks): return create_api_response('Success',data=result, message='Source Node Created Successfully') else: diff --git a/backend/src/main.py b/backend/src/main.py index d48947811..731ff56e5 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -419,7 +419,10 @@ def processing_source(uri, userName, password, database, model, file_name, pages processing_source_func = time.time() - processing_source_start_time logging.info(f"Time taken to processing source function completed in {processing_source_func:.2f} seconds for file name {file_name}") uri_latency["Processed_source"] = f'{processing_source_func:.2f}' - uri_latency["Per_entity_latency"] = f'{int(processing_source_func)/node_count:.2f}/s' + if node_count == 0: + uri_latency["Per_entity_latency"] = 'N/A' + else: + uri_latency["Per_entity_latency"] = f'{int(processing_source_func)/node_count}/s' uri_latency["fileName"] = file_name uri_latency["nodeCount"] = node_count uri_latency["relationshipCount"] = rel_count From 178dacbd05184e4bd44529fc88a0501167efc76c Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 4 Oct 2024 14:58:57 +0530 Subject: [PATCH 152/292] Multiple chat modes selection (#780) * added Multi modes selection * multimodes state mangement * fix: state handling of chat details of the default mode * Added the ChatModeSwitch Component * modes switch statemangement * added the chatmodes switch in both view * removed the copied text * Handled the error scenario * fix: speech issue between modes * fix: Handled activespeech speech and othermessage modes switch * used requestanimationframe instead of setTimeOut * removed the commented code --- backend/score.py | 2 +- frontend/src/assets/ChatbotMessages.json | 39 +- .../src/components/ChatBot/ChatModeToggle.tsx | 31 +- .../components/ChatBot/ChatModesSwitch.tsx | 47 ++ frontend/src/components/ChatBot/Chatbot.tsx | 609 ++++++++++-------- .../components/ChatBot/CommonChatActions.tsx | 56 ++ frontend/src/components/Layout/SideNav.tsx | 9 +- frontend/src/context/UsersFiles.tsx | 6 +- frontend/src/hooks/useSpeech.tsx | 17 +- frontend/src/types.ts | 31 +- frontend/src/utils/Constants.ts | 2 +- 11 files changed, 486 insertions(+), 363 deletions(-) create mode 100644 frontend/src/components/ChatBot/ChatModesSwitch.tsx create mode 100644 frontend/src/components/ChatBot/CommonChatActions.tsx diff --git a/backend/score.py b/backend/score.py index 8887d6450..1162e7a80 100644 --- a/backend/score.py +++ b/backend/score.py @@ -323,7 +323,7 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), message="Unable to get chat response" error_message = str(e) logging.exception(f'Exception in chat bot:{error_message}') - return create_api_response(job_status, message=message, error=error_message) + return create_api_response(job_status, message=message, error=error_message,data=mode) finally: gc.collect() diff --git a/frontend/src/assets/ChatbotMessages.json b/frontend/src/assets/ChatbotMessages.json index e245c61e4..c264938b1 100644 --- a/frontend/src/assets/ChatbotMessages.json +++ b/frontend/src/assets/ChatbotMessages.json @@ -1,40 +1,15 @@ { "listMessages": [ - { - "id": 1, - "message": "Hi, I need help with creating a Cypher query for Neo4j.", - "user": "user", - "datetime": "01/01/2024 00:00:00" - }, { "id": 2, - "message": " Welcome to the Neo4j Knowledge Graph Chat. You can ask questions related to documents which have been completely processed.", - "user": "chatbot", - "datetime": "01/01/2024 00:00:00" - }, - { - "id": 3, - "message": "I need to find all employees who work in the IT department.", - "user": "user", - "datetime": "01/01/2024 00:00:00" - }, - { - "id": 4, - "message": "Alright, you can use the following query: `MATCH (e:Employee)-[:WORKS_IN]->(d:Department {name: 'IT'}) RETURN e.name`. This query matches nodes labeled 'Employee' related to the 'IT' department and returns their names.", - "user": "chatbot", - "datetime": "01/01/2024 00:00:00" - }, - { - "id": 5, - "message": "Thanks! And how do I get the total number of such employees?", - "user": "user", - "datetime": "01/01/2024 00:00:00" - }, - { - "id": 6, - "message": "To get the count, use: `MATCH (e:Employee)-[:WORKS_IN]->(d:Department {name: 'IT'}) RETURN count(e)`. This counts all the distinct 'Employee' nodes related to the 'IT' department.", + "modes":{ + "graph+vector+fulltext":{ + "message": " Welcome to the Neo4j Knowledge Graph Chat. You can ask questions related to documents which have been completely processed." + } + }, "user": "chatbot", - "datetime": "01/01/2024 00:00:00" + "datetime": "01/01/2024 00:00:00", + "currentMode":"graph+vector+fulltext" } ] } \ No newline at end of file diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index df44985be..b6ece4702 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -2,10 +2,11 @@ import { StatusIndicator, Typography } from '@neo4j-ndl/react'; import { useMemo, useEffect } from 'react'; import { useFileContext } from '../../context/UsersFiles'; import CustomMenu from '../UI/Menu'; -import { chatModeLables, chatModes } from '../../utils/Constants'; +import { chatModeLables, chatModes as AvailableModes } from '../../utils/Constants'; import { capitalize } from '@mui/material'; import { capitalizeWithPlus } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; + export default function ChatModeToggle({ menuAnchor, closeHandler = () => {}, @@ -19,22 +20,22 @@ export default function ChatModeToggle({ anchorPortal?: boolean; disableBackdrop?: boolean; }) { - const { setchatMode, chatMode, postProcessingTasks, selectedRows } = useFileContext(); + const { setchatModes, chatModes, postProcessingTasks, selectedRows } = useFileContext(); const isCommunityAllowed = postProcessingTasks.includes('enable_communities'); const { isGdsActive } = useCredentials(); useEffect(() => { // If rows are selected, the mode is valid (either vector or graph+vector) if (selectedRows.length > 0) { - if (!(chatMode === chatModeLables.vector || chatMode === chatModeLables.graph_vector)) { - setchatMode(chatModeLables.graph_vector); + if (!(chatModes.includes(chatModeLables.vector) || chatModes.includes(chatModeLables.graph_vector))) { + setchatModes([chatModeLables.graph_vector]); } } - }, [selectedRows.length, chatMode, setchatMode]); + }, [selectedRows.length, chatModes.length]); const memoizedChatModes = useMemo(() => { return isGdsActive && isCommunityAllowed - ? chatModes - : chatModes?.filter( + ? AvailableModes + : AvailableModes?.filter( (m) => !m.mode.includes(chatModeLables.entity_vector) && !m.mode.includes(chatModeLables.global_vector) ); }, [isGdsActive, isCommunityAllowed]); @@ -45,9 +46,11 @@ export default function ChatModeToggle({ ); const handleModeChange = () => { if (isDisabled) { - setchatMode(chatModeLables.graph_vector); + setchatModes([chatModeLables.graph_vector]); + } else if (chatModes.includes(m.mode)) { + setchatModes((prev) => prev.filter((i) => i != m.mode)); } else { - setchatMode(m.mode); + setchatModes((prev) => [...prev, m.mode]); } closeHandler(); }; @@ -66,7 +69,7 @@ export default function ChatModeToggle({ disabledCondition: isDisabled, description: ( - {chatMode === m.mode && ( + {chatModes.includes(m.mode) && ( <> {chatModeLables.selected} @@ -80,13 +83,13 @@ export default function ChatModeToggle({ ), }; }); - }, [chatMode, memoizedChatModes, setchatMode, closeHandler, selectedRows]); + }, [chatModes.length, memoizedChatModes, closeHandler, selectedRows]); useEffect(() => { - if (!selectedRows.length && !chatMode) { - setchatMode(chatModeLables.graph_vector_fulltext); + if (!selectedRows.length && !chatModes.length) { + setchatModes([chatModeLables.graph_vector_fulltext]); } - }, [setchatMode, selectedRows.length, chatMode]); + }, [selectedRows.length, chatModes.length]); return ( void; + currentModeIndex: number; + modescount: number; + currentMode: string; + isFullScreen: boolean; +}) { + const chatmodetoshow = currentMode.includes('+') ? capitalizeWithPlus(currentMode) : capitalize(currentMode); + return ( + + switchToOtherMode(currentModeIndex - 1)} + > + + + + + {chatmodetoshow} + + + switchToOtherMode(currentModeIndex + 1)} + > + + + + ); +} diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 550d04c97..2d23ec452 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -1,26 +1,31 @@ -import React, { FC, lazy, Suspense, useEffect, useRef, useState } from 'react'; -import { Widget, Typography, Avatar, TextInput, IconButton, Modal, useCopyToClipboard } from '@neo4j-ndl/react'; +import React, { FC, lazy, Suspense, useCallback, useEffect, useRef, useState } from 'react'; import { - XMarkIconOutline, - ClipboardDocumentIconOutline, - SpeakerWaveIconOutline, - SpeakerXMarkIconOutline, -} from '@neo4j-ndl/react/icons'; + Widget, + Typography, + Avatar, + TextInput, + IconButton, + Modal, + useCopyToClipboard, + Flex, + Box, +} from '@neo4j-ndl/react'; +import { XMarkIconOutline } from '@neo4j-ndl/react/icons'; import ChatBotAvatar from '../../assets/images/chatbot-ai.png'; -import { ChatbotProps, CustomFile, UserCredentials, nodeDetailsProps } from '../../types'; +import { ChatbotProps, CustomFile, Messages, ResponseMode, UserCredentials, nodeDetailsProps } from '../../types'; import { useCredentials } from '../../context/UserCredentials'; import { chatBotAPI } from '../../services/QnaAPI'; import { v4 as uuidv4 } from 'uuid'; import { useFileContext } from '../../context/UsersFiles'; import clsx from 'clsx'; import ReactMarkdown from 'react-markdown'; -import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; -import { buttonCaptions, chatModeLables, tooltips } from '../../utils/Constants'; +import { buttonCaptions, chatModeLables } from '../../utils/Constants'; import useSpeechSynthesis from '../../hooks/useSpeech'; import ButtonWithToolTip from '../UI/ButtonWithToolTip'; import FallBackDialog from '../UI/FallBackDialog'; -import { capitalizeWithPlus } from '../../utils/Utils'; -import { capitalize } from '@mui/material'; +import { getDateTime } from '../../utils/Utils'; +import ChatModesSwitch from './ChatModesSwitch'; +import CommonActions from './CommonChatActions'; const InfoModal = lazy(() => import('./ChatInfoModal')); const Chatbot: FC = (props) => { @@ -35,7 +40,7 @@ const Chatbot: FC = (props) => { const [inputMessage, setInputMessage] = useState(''); const [loading, setLoading] = useState(isLoading); const { userCredentials } = useCredentials(); - const { model, chatMode, selectedRows, filesData } = useFileContext(); + const { model, chatModes, selectedRows, filesData } = useFileContext(); const messagesEndRef = useRef(null); const [sessionId, setSessionId] = useState(sessionStorage.getItem('session_id') ?? ''); const [showInfoModal, setShowInfoModal] = useState(false); @@ -44,15 +49,14 @@ const Chatbot: FC = (props) => { const [responseTime, setResponseTime] = useState(0); const [tokensUsed, setTokensUsed] = useState(0); const [cypherQuery, setcypherQuery] = useState(''); - const [copyMessageId, setCopyMessageId] = useState(null); const [chatsMode, setChatsMode] = useState(chatModeLables.graph_vector_fulltext); const [graphEntitites, setgraphEntitites] = useState<[]>([]); const [messageError, setmessageError] = useState(''); const [entitiesModal, setEntitiesModal] = useState([]); const [nodeDetailsModal, setNodeDetailsModal] = useState({}); - const [value, copy] = useCopyToClipboard(); - const { speak, cancel } = useSpeechSynthesis({ + const [_, copy] = useCopyToClipboard(); + const { speak, cancel, speaking } = useSpeechSynthesis({ onEnd: () => { setListMessages((msgs) => msgs.map((msg) => ({ ...msg, speaking: false }))); }, @@ -65,6 +69,7 @@ const Chatbot: FC = (props) => { const handleInputChange = (e: React.ChangeEvent) => { setInputMessage(e.target.value); }; + useEffect(() => { if (!sessionStorage.getItem('session_id')) { const id = uuidv4(); @@ -72,160 +77,178 @@ const Chatbot: FC = (props) => { sessionStorage.setItem('session_id', id); } }, []); - const simulateTypingEffect = ( - response: { - reply: string; - sources?: string[]; - model?: string; - total_tokens?: number; - response_time?: number; - speaking?: boolean; - copying?: boolean; - mode?: string; - cypher_query?: string; - graphonly_entities?: []; - error?: string; - entitiysearchonly_entities?: string[]; - nodeDetails?: nodeDetailsProps; - }, - index = 0 - ) => { - if (index < response.reply.length) { - const nextIndex = index + 1; - const currentTypedText = response.reply.substring(0, nextIndex); - if (index === 0) { - const date = new Date(); - const datetime = `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`; - if (response.reply.length <= 1) { - setListMessages((msgs) => [ - ...msgs, - { - id: Date.now(), - user: 'chatbot', - message: currentTypedText, - datetime: datetime, - isTyping: false, - isLoading: true, - sources: response?.sources, - model: response?.model, - total_tokens: response.total_tokens, - response_time: response?.response_time, - speaking: false, - copying: false, - mode: response?.mode, - cypher_query: response?.cypher_query, - graphonly_entities: response?.graphonly_entities, - error: response.error, - entitiysearchonly_entities: response.entitiysearchonly_entities, - nodeDetails: response?.nodeDetails, - }, - ]); - } else { - setListMessages((msgs) => { - const lastmsg = { ...msgs[msgs.length - 1] }; - lastmsg.id = Date.now(); - lastmsg.user = 'chatbot'; - lastmsg.message = currentTypedText; - lastmsg.datetime = datetime; - lastmsg.isTyping = true; - lastmsg.isLoading = false; - lastmsg.sources = response?.sources; - lastmsg.model = response?.model; - lastmsg.total_tokens = response?.total_tokens; - lastmsg.response_time = response?.response_time; - lastmsg.speaking = false; - lastmsg.copying = false; - lastmsg.mode = response?.mode; - lastmsg.cypher_query = response.cypher_query; - lastmsg.graphonly_entities = response.graphonly_entities; - lastmsg.error = response.error; - lastmsg.entities = response.entitiysearchonly_entities; - lastmsg.nodeDetails = response?.nodeDetails; - return msgs.map((msg, index) => { - if (index === msgs.length - 1) { - return lastmsg; + + const simulateTypingEffect = (messageId: number, response: ResponseMode, mode: string, message: string) => { + let index = 0; + let lastTimestamp: number | null = null; + const TYPING_INTERVAL = 20; + const animate = (timestamp: number) => { + if (lastTimestamp === null) { + lastTimestamp = timestamp; + } + const elapsed = timestamp - lastTimestamp; + if (elapsed >= TYPING_INTERVAL) { + if (index < message.length) { + const nextIndex = index + 1; + const currentTypedText = message.substring(0, nextIndex); + setListMessages((msgs) => + msgs.map((msg) => { + if (msg.id === messageId) { + return { + ...msg, + modes: { + ...msg.modes, + [mode]: { + ...response, + message: currentTypedText, + }, + }, + isTyping: true, + speaking: false, + copying: false, + }; } return msg; - }); - }); + }) + ); + index = nextIndex; + lastTimestamp = timestamp; + } else { + setListMessages((msgs) => msgs.map((msg) => (msg.id === messageId ? { ...msg, isTyping: false } : msg))); + return; } - } else { - setListMessages((msgs) => msgs.map((msg) => (msg.isTyping ? { ...msg, message: currentTypedText } : msg))); } - setTimeout(() => simulateTypingEffect(response, nextIndex), 20); - } else { - setListMessages((msgs) => msgs.map((msg) => (msg.isTyping ? { ...msg, isTyping: false } : msg))); - } + requestAnimationFrame(animate); + }; + requestAnimationFrame(animate); }; - let date = new Date(); const handleSubmit = async (e: { preventDefault: () => void }) => { e.preventDefault(); if (!inputMessage.trim()) { return; } - let chatbotReply; - let chatSources; - let chatModel; - let chatnodedetails; - let chatTimeTaken; - let chatTokensUsed; - let chatingMode; - let cypher_query; - let graphonly_entities; - let error; - let entitiysearchonly_entities; - let chatEntities; - const datetime = `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`; - const userMessage = { id: Date.now(), user: 'user', message: inputMessage, datetime: datetime, mode: chatMode }; + const datetime = getDateTime(); + const userMessage: Messages = { + id: Date.now(), + user: 'user', + datetime: datetime, + currentMode: chatModes[0], + modes: {}, + }; + userMessage.modes[chatModes[0]] = { message: inputMessage }; setListMessages([...listMessages, userMessage]); + const chatbotMessageId = Date.now() + 1; + const chatbotMessage: Messages = { + id: chatbotMessageId, + user: 'chatbot', + datetime: new Date().toLocaleString(), + isTyping: true, + isLoading: true, + modes: {}, + currentMode: chatModes[0], + }; + setListMessages((prev) => [...prev, chatbotMessage]); try { + const apiCalls = chatModes.map((mode) => + chatBotAPI( + userCredentials as UserCredentials, + inputMessage, + sessionId, + model, + mode, + selectedFileNames?.map((f) => f.name) + ) + ); setInputMessage(''); - simulateTypingEffect({ reply: ' ' }); - const chatbotAPI = await chatBotAPI( - userCredentials as UserCredentials, - inputMessage, - sessionId, - model, - chatMode, - selectedFileNames?.map((f) => f.name) + const results = await Promise.allSettled(apiCalls); + results.forEach((result, index) => { + const mode = chatModes[index]; + if (result.status === 'fulfilled') { + // @ts-ignore + if (result.value.response.data.status === 'Success') { + const response = result.value.response.data.data; + const responseMode: ResponseMode = { + message: response.message, + sources: response.info.sources, + model: response.info.model, + total_tokens: response.info.total_tokens, + response_time: response.info.response_time, + cypher_query: response.info.cypher_query, + graphonly_entities: response.info.context, + entities: response.info.entities, + nodeDetails: response.info.nodedetails, + error: response.info.error, + }; + if (index === 0) { + simulateTypingEffect(chatbotMessageId, responseMode, mode, responseMode.message); + } else { + setListMessages((prev) => + prev.map((msg) => + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + ) + ); + } + } else { + const response = result.value.response.data; + const responseMode: ResponseMode = { + message: response.message, + error: response.error, + }; + if (index === 0) { + simulateTypingEffect(chatbotMessageId, responseMode, response.data, responseMode.message); + } else { + setListMessages((prev) => + prev.map((msg) => + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + ) + ); + } + } + } else { + console.error(`API call failed for mode ${mode}:`, result.reason); + setListMessages((prev) => + prev.map((msg) => + (msg.id === chatbotMessageId + ? { + ...msg, + modes: { + ...msg.modes, + [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, + }, + } + : msg) + ) + ); + } + }); + setListMessages((prev) => + prev.map((msg) => (msg.id === chatbotMessageId ? { ...msg, isLoading: false, isTyping: false } : msg)) ); - const chatresponse = chatbotAPI?.response; - chatbotReply = chatresponse?.data?.data?.message; - chatSources = chatresponse?.data?.data?.info.sources; - chatModel = chatresponse?.data?.data?.info.model; - chatnodedetails = chatresponse?.data?.data?.info.nodedetails; - chatTokensUsed = chatresponse?.data?.data?.info.total_tokens; - chatTimeTaken = chatresponse?.data?.data?.info.response_time; - chatingMode = chatresponse?.data?.data?.info?.mode; - cypher_query = chatresponse?.data?.data?.info?.cypher_query ?? ''; - graphonly_entities = chatresponse?.data.data.info.context ?? []; - entitiysearchonly_entities = chatresponse?.data.data.info.entities; - error = chatresponse.data.data.info.error ?? ''; - chatEntities = chatresponse.data.data.info.entities; - const finalbotReply = { - reply: chatbotReply, - sources: chatSources, - model: chatModel, - total_tokens: chatTokensUsed, - response_time: chatTimeTaken, - speaking: false, - copying: false, - mode: chatingMode, - cypher_query, - graphonly_entities, - error, - entitiysearchonly_entities, - chatEntities, - nodeDetails: chatnodedetails, - }; - simulateTypingEffect(finalbotReply); } catch (error) { - chatbotReply = "Oops! It seems we couldn't retrieve the answer. Please try again later"; - setInputMessage(''); - simulateTypingEffect({ reply: chatbotReply }); + console.error('Error in handling chat:', error); + if (error instanceof Error) { + setListMessages((prev) => + prev.map((msg) => + (msg.id === chatbotMessageId + ? { + ...msg, + isLoading: false, + isTyping: false, + modes: { + [chatModes[0]]: { + message: 'An error occurred while processing your request.', + error: error.message, + }, + }, + } + : msg) + ) + ); + } } }; + const scrollToBottom = () => { messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }); }; @@ -254,9 +277,7 @@ const Chatbot: FC = (props) => { return msg; }) ); - setCopyMessageId(id); setTimeout(() => { - setCopyMessageId(null); setListMessages((msgs) => msgs.map((msg) => { if (msg.id === id) { @@ -273,152 +294,172 @@ const Chatbot: FC = (props) => { setListMessages((msgs) => msgs.map((msg) => (msg.id === id ? { ...msg, speaking: false } : msg))); }; - const handleSpeak = (chatMessage: any, id: number) => { - speak({ text: chatMessage }); + const handleSpeak = (chatMessage: string, id: number) => { + speak({ text: chatMessage }, typeof window !== 'undefined' && window.speechSynthesis != undefined); setListMessages((msgs) => { const messageWithSpeaking = msgs.find((msg) => msg.speaking); return msgs.map((msg) => (msg.id === id && !messageWithSpeaking ? { ...msg, speaking: true } : msg)); }); }; + const handleSwitchMode = (messageId: number, newMode: string) => { + const activespeechId = listMessages.find((msg) => msg.speaking)?.id; + if (speaking && messageId === activespeechId) { + cancel(); + setListMessages((prev) => + prev.map((msg) => (msg.id === messageId ? { ...msg, currentMode: newMode, speaking: false } : msg)) + ); + } else { + setListMessages((prev) => prev.map((msg) => (msg.id === messageId ? { ...msg, currentMode: newMode } : msg))); + } + }; + + const detailsHandler = useCallback((chat: Messages) => { + const currentMode = chat.modes[chat.currentMode]; + setModelModal(currentMode.model ?? ''); + setSourcesModal(currentMode.sources ?? []); + setResponseTime(currentMode.response_time ?? 0); + setTokensUsed(currentMode.total_tokens ?? 0); + setcypherQuery(currentMode.cypher_query ?? ''); + setShowInfoModal(true); + setChatsMode(chat.currentMode ?? ''); + setgraphEntitites(currentMode.graphonly_entities ?? []); + setEntitiesModal(currentMode.entities ?? []); + setmessageError(currentMode.error ?? ''); + setNodeDetailsModal(currentMode.nodeDetails ?? {}); + }, []); + + const speechHandler = useCallback((chat: Messages) => { + if (chat.speaking) { + handleCancel(chat.id); + } else { + handleSpeak(chat.modes[chat.currentMode]?.message, chat.id); + } + }, []); return (
    - {listMessages.map((chat, index) => ( -
    -
    - {chat.user === 'chatbot' ? ( - - ) : ( - - )} -
    - - Chat Mode: {chat.mode.includes('+') ? capitalizeWithPlus(chat.mode) : capitalize(chat.mode)} - - ) : ( - '' - ) - } + {listMessages.map((chat, index) => { + const messagechatModes = Object.keys(chat.modes); + return ( +
    -
    + {chat.user === 'chatbot' ? ( + + ) : ( + + )} +
    + - {chat.message} -
    -
    -
    - - {chat.datetime} - +
    + {chat.modes[chat.currentMode]?.message || ''}
    - {chat.user === 'chatbot' && chat.id !== 2 && !chat.isLoading && !chat.isTyping && ( -
    - { - setModelModal(chat.model ?? ''); - setSourcesModal(chat.sources ?? []); - setResponseTime(chat.response_time ?? 0); - setTokensUsed(chat.total_tokens ?? 0); - setcypherQuery(chat.cypher_query ?? ''); - setShowInfoModal(true); - setChatsMode(chat.mode ?? ''); - setgraphEntitites(chat.graphonly_entities ?? []); - setEntitiesModal(chat.entities ?? []); - setmessageError(chat.error ?? ''); - setNodeDetailsModal(chat.nodeDetails ?? {}); - }} - > - {' '} - {buttonCaptions.details} - - handleCopy(chat.message, chat.id)} - disabled={chat.isTyping || chat.isLoading} - > - - - {copyMessageId === chat.id && ( - <> - Copied! - {value} - - )} - { - if (chat.speaking) { - handleCancel(chat.id); - } else { - handleSpeak(chat.message, chat.id); - } - }} - text={chat.speaking ? tooltips.stopSpeaking : tooltips.textTospeech} - disabled={listMessages.some((msg) => msg.speaking && msg.id !== chat.id)} - label={chat.speaking ? 'stop speaking' : 'text to speech'} - > - {chat.speaking ? ( - - ) : ( - - )} - +
    +
    + + {chat.datetime} +
    - )} -
    - -
    - ))} + {chat.user === 'chatbot' && + chat.id !== 2 && + !chat.isLoading && + !chat.isTyping && + (!isFullScreen ? ( + 1 ? 'space-between' : 'unset'} + alignItems='center' + > + + {messagechatModes.length > 1 && ( + { + const modes = Object.keys(chat.modes); + const modeswtich = modes[index]; + handleSwitchMode(chat.id, modeswtich); + }} + isFullScreen={false} + currentModeIndex={messagechatModes.indexOf(chat.currentMode)} + modescount={messagechatModes.length} + /> + )} + + ) : ( + + + + + + {messagechatModes.length > 1 && ( + { + const modes = Object.keys(chat.modes); + const modeswtich = modes[index]; + handleSwitchMode(chat.id, modeswtich); + }} + isFullScreen={isFullScreen} + currentModeIndex={messagechatModes.indexOf(chat.currentMode)} + modescount={messagechatModes.length} + /> + )} + + + ))} +
    + +
    + ); + })}
    diff --git a/frontend/src/components/ChatBot/CommonChatActions.tsx b/frontend/src/components/ChatBot/CommonChatActions.tsx new file mode 100644 index 000000000..07f04bc8f --- /dev/null +++ b/frontend/src/components/ChatBot/CommonChatActions.tsx @@ -0,0 +1,56 @@ +import { ClipboardDocumentIconOutline, SpeakerWaveIconOutline, SpeakerXMarkIconOutline } from '@neo4j-ndl/react/icons'; +import { Messages } from '../../types'; +import ButtonWithToolTip from '../UI/ButtonWithToolTip'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; +import { buttonCaptions, tooltips } from '../../utils/Constants'; + +export default function CommonActions({ + chat, + detailsHandler, + speechHandler, + copyHandler, + listMessages, +}: { + chat: Messages; + detailsHandler: (chat: Messages) => void; + speechHandler: (chat: Messages) => void; + copyHandler: (message: string, id: number) => void; + listMessages: Messages[]; +}) { + return ( + <> + detailsHandler(chat)} + > + {buttonCaptions.details} + + copyHandler(chat.modes[chat.currentMode]?.message, chat.id)} + disabled={chat.isTyping || chat.isLoading} + > + + + speechHandler(chat)} + text={chat.speaking ? tooltips.stopSpeaking : tooltips.textTospeech} + disabled={listMessages.some((msg) => msg.speaking && msg.id !== chat.id)} + label={chat.speaking ? 'stop speaking' : 'text to speech'} + > + {chat.speaking ? : } + + + ); +} diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index e07979405..2472d6e76 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -54,9 +54,14 @@ const SideNav: React.FC = ({ { datetime: `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`, id: 2, - message: - ' Welcome to the Neo4j Knowledge Graph Chat. You can ask questions related to documents which have been completely processed.', + modes: { + 'graph+vector+fulltext': { + message: + ' Welcome to the Neo4j Knowledge Graph Chat. You can ask questions related to documents which have been completely processed.', + }, + }, user: 'chatbot', + currentMode: 'graph+vector+fulltext', }, ]); } diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index 04a435f07..14f5a0245 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -29,7 +29,7 @@ const FileContextProvider: FC = ({ children }) => { const [selectedSchemas, setSelectedSchemas] = useState([]); const [rowSelection, setRowSelection] = useState>({}); const [selectedRows, setSelectedRows] = useState([]); - const [chatMode, setchatMode] = useState(chatModeLables.graph_vector_fulltext); + const [chatModes, setchatModes] = useState([chatModeLables.graph_vector_fulltext]); const [isSchema, setIsSchema] = useState(false); const [showTextFromSchemaDialog, setShowTextFromSchemaDialog] = useState({ triggeredFrom: '', @@ -78,8 +78,8 @@ const FileContextProvider: FC = ({ children }) => { setSelectedRows, selectedSchemas, setSelectedSchemas, - chatMode, - setchatMode, + chatModes, + setchatModes, isSchema, setIsSchema, setShowTextFromSchemaDialog, diff --git a/frontend/src/hooks/useSpeech.tsx b/frontend/src/hooks/useSpeech.tsx index d8173f6dd..54c7967f4 100644 --- a/frontend/src/hooks/useSpeech.tsx +++ b/frontend/src/hooks/useSpeech.tsx @@ -1,22 +1,17 @@ -import { useEffect, useState } from 'react'; +import { useState } from 'react'; import { SpeechSynthesisProps, SpeechArgs } from '../types'; const useSpeechSynthesis = (props: SpeechSynthesisProps = {}) => { const { onEnd = () => {} } = props; const [speaking, setSpeaking] = useState(false); - const [supported, setSupported] = useState(false); const handleEnd = () => { setSpeaking(false); onEnd(); }; - useEffect(() => { - if (typeof window !== 'undefined' && window.speechSynthesis) { - setSupported(true); - } - }, []); - const speak = (args: SpeechArgs = {}) => { + + const speak = (args: SpeechArgs = {}, isSupported: boolean) => { const { text = '', rate = 1, pitch = 1, volume = 1 } = args; - if (!supported) { + if (!isSupported) { return; } setSpeaking(true); @@ -29,14 +24,10 @@ const useSpeechSynthesis = (props: SpeechSynthesisProps = {}) => { window.speechSynthesis.speak(utterance); }; const cancel = () => { - if (!supported) { - return; - } setSpeaking(false); window.speechSynthesis.cancel(); }; return { - supported, speak, speaking, cancel, diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 5dc09f418..eb0d6c090 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -206,26 +206,31 @@ export interface ChunkDetail { id: string; score: number; } -export interface Messages { - id: number; +export type ResponseMode = { message: string; - user: string; - datetime: string; - isTyping?: boolean; sources?: string[]; model?: string; - isLoading?: boolean; + total_tokens?: number; response_time?: number; + cypher_query?: string; nodeDetails?: nodeDetailsProps; chunk_ids?: string[]; - total_tokens?: number; - speaking?: boolean; - copying?: boolean; - mode?: string; - cypher_query?: string; graphonly_entities?: []; error?: string; entities?: string[]; +}; +export interface Messages { + id: number; + user: string; + datetime: string; + isTyping?: boolean; + isLoading?: boolean; + speaking?: boolean; + copying?: boolean; + modes: { + [key: string]: ResponseMode; + }; + currentMode: string; } export type ChatbotProps = { @@ -723,8 +728,8 @@ export interface FileContextType { setSelectedRows: React.Dispatch>; selectedSchemas: readonly OptionType[]; setSelectedSchemas: Dispatch>; - chatMode: string; - setchatMode: Dispatch>; + chatModes: string[]; + setchatModes: Dispatch>; isSchema: boolean; setIsSchema: React.Dispatch>; showTextFromSchemaDialog: showTextFromSchemaDialogType; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index a9a41c0a2..f740531d0 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -311,5 +311,5 @@ export const connectionLabels = { redStroke: 'red', }; export const getDefaultMessage = () => { - return [{ ...chatbotmessages.listMessages[1], datetime: getDateTime() }]; + return [{ ...chatbotmessages.listMessages[0], datetime: getDateTime() }]; }; From 1a33f0d30b260add3868359cf98c31d5714b5702 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Mon, 7 Oct 2024 05:51:36 +0000 Subject: [PATCH 153/292] Fix: ChatModes DeSelection on FIle Selection --- frontend/src/components/ChatBot/ChatModeToggle.tsx | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index b6ece4702..127942c57 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -27,6 +27,17 @@ export default function ChatModeToggle({ useEffect(() => { // If rows are selected, the mode is valid (either vector or graph+vector) if (selectedRows.length > 0) { + if ( + chatModes.includes(chatModeLables.graph) || + chatModes.includes(chatModeLables.fulltext) || + chatModes.includes(chatModeLables.graph_vector_fulltext) + ) { + setchatModes((prev) => + prev.filter( + (m) => ![chatModeLables.graph, chatModeLables.fulltext, chatModeLables.graph_vector_fulltext].includes(m) + ) + ); + } if (!(chatModes.includes(chatModeLables.vector) || chatModes.includes(chatModeLables.graph_vector))) { setchatModes([chatModeLables.graph_vector]); } From e5760553ecfffef3bd6fe0bd40f9e8d6f566f770 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Mon, 7 Oct 2024 07:15:19 +0000 Subject: [PATCH 154/292] Fix: Order of the chatmodes accordoing to selected chatmodes --- frontend/src/components/ChatBot/Chatbot.tsx | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 2d23ec452..e075777db 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -114,7 +114,16 @@ const Chatbot: FC = (props) => { index = nextIndex; lastTimestamp = timestamp; } else { - setListMessages((msgs) => msgs.map((msg) => (msg.id === messageId ? { ...msg, isTyping: false } : msg))); + setListMessages((msgs) => { + const activeMessage = msgs.find((message) => message.id === messageId); + let sortedModes: Record; + if (activeMessage) { + sortedModes = Object.fromEntries( + chatModes.filter((m) => m in activeMessage.modes).map((key) => [key, activeMessage?.modes[key]]) + ); + } + return msgs.map((msg) => (msg.id === messageId ? { ...msg, isTyping: false, modes: sortedModes } : msg)); + }); return; } } @@ -164,6 +173,7 @@ const Chatbot: FC = (props) => { const results = await Promise.allSettled(apiCalls); results.forEach((result, index) => { const mode = chatModes[index]; + console.log({ mode }); if (result.status === 'fulfilled') { // @ts-ignore if (result.value.response.data.status === 'Success') { @@ -222,9 +232,10 @@ const Chatbot: FC = (props) => { ); } }); - setListMessages((prev) => - prev.map((msg) => (msg.id === chatbotMessageId ? { ...msg, isLoading: false, isTyping: false } : msg)) - ); + + setListMessages((prev) => { + return prev.map((msg) => (msg.id === chatbotMessageId ? { ...msg, isLoading: false, isTyping: false } : msg)); + }); } catch (error) { console.error('Error in handling chat:', error); if (error instanceof Error) { From d1a56cab538d30bffc1482e10e11788dd257ca39 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Wed, 9 Oct 2024 11:37:00 +0530 Subject: [PATCH 155/292] Community optimization (#790) * modified leidens parameters * updated disconnected nodes query * excluded communities from dedup * added index creation * modified de dup query * added delete query for communities --- backend/score.py | 4 +- backend/src/communities.py | 107 ++++++++++++++++++++++++++---- backend/src/graphDB_dataAccess.py | 60 ++++++++++++----- backend/src/post_processing.py | 76 ++++++++++++++++++--- backend/src/shared/constants.py | 8 +-- 5 files changed, 211 insertions(+), 44 deletions(-) diff --git a/backend/score.py b/backend/score.py index 1162e7a80..7bc62d186 100644 --- a/backend/score.py +++ b/backend/score.py @@ -14,7 +14,7 @@ from src.graphDB_dataAccess import graphDBdataAccess from src.graph_query import get_graph_results from src.chunkid_entities import get_entities_from_chunkids -from src.post_processing import create_fulltext_indexes, create_entity_embedding +from src.post_processing import create_vector_fulltext_indexes, create_entity_embedding from sse_starlette.sse import EventSourceResponse from src.communities import create_communities import json @@ -270,7 +270,7 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database logging.info(f'Updated KNN Graph') if "enable_hybrid_search_and_fulltext_search_in_bloom" in tasks: - await asyncio.to_thread(create_fulltext_indexes, uri=uri, username=userName, password=password, database=database) + await asyncio.to_thread(create_vector_fulltext_indexes, uri=uri, username=userName, password=password, database=database) json_obj = {'api_name': 'post_processing/enable_hybrid_search_and_fulltext_search_in_bloom', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logging.info(f'Full Text index created') diff --git a/backend/src/communities.py b/backend/src/communities.py index 7086a4786..1b19c689b 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -12,6 +12,8 @@ NODE_PROJECTION = "!Chunk&!Document&!__Community__" NODE_PROJECTION_ENTITY = "__Entity__" MAX_WORKERS = 10 +MAX_COMMUNITY_LEVELS = 3 +MIN_COMMUNITY_SIZE = 1 CREATE_COMMUNITY_GRAPH_PROJECTION = """ @@ -143,7 +145,8 @@ ENTITY_VECTOR_INDEX_NAME = "entity_vector" ENTITY_VECTOR_EMBEDDING_DIMENSION = 384 -CREATE_ENTITY_VECTOR_INDEX = """ +DROP_ENTITY_VECTOR_INDEX_QUERY = f"DROP INDEX {ENTITY_VECTOR_INDEX_NAME} IF EXISTS;" +CREATE_ENTITY_VECTOR_INDEX_QUERY = """ CREATE VECTOR INDEX {index_name} IF NOT EXISTS FOR (e:__Entity__) ON e.embedding OPTIONS {{ indexConfig: {{ @@ -151,7 +154,26 @@ `vector.similarity_function`: 'cosine' }} }} -""" +""" + +COMMUNITY_VECTOR_INDEX_NAME = "community_vector" +COMMUNITY_VECTOR_EMBEDDING_DIMENSION = 384 + +DROP_COMMUNITY_VECTOR_INDEX_QUERY = f"DROP INDEX {COMMUNITY_VECTOR_INDEX_NAME} IF EXISTS;" +CREATE_COMMUNITY_VECTOR_INDEX_QUERY = """ +CREATE VECTOR INDEX {index_name} IF NOT EXISTS FOR (c:__Community__) ON c.embedding +OPTIONS {{ + indexConfig: {{ + `vector.dimensions`: {embedding_dimension}, + `vector.similarity_function`: 'cosine' + }} +}} +""" + +COMMUNITY_FULLTEXT_INDEX_NAME = "community_keyword" +COMMUNITY_FULLTEXT_INDEX_DROP_QUERY = f"DROP INDEX {COMMUNITY_FULLTEXT_INDEX_NAME} IF EXISTS;" +COMMUNITY_INDEX_FULL_TEXT_QUERY = f"CREATE FULLTEXT INDEX {COMMUNITY_FULLTEXT_INDEX_NAME} FOR (n:`__Community__`) ON EACH [n.summary]" + def get_gds_driver(uri, username, password, database): @@ -194,7 +216,9 @@ def write_communities(gds, graph_project, project_name=COMMUNITY_PROJECTION_NAME graph_project, writeProperty=project_name, includeIntermediateCommunities=True, - relationshipWeightProperty="weight" + relationshipWeightProperty="weight", + maxLevels=MAX_COMMUNITY_LEVELS, + minCommunitySize=MIN_COMMUNITY_SIZE, ) logging.info("Communities written successfully.") return True @@ -328,18 +352,69 @@ def create_community_embeddings(gds): except Exception as e: logging.error(f"An error occurred during the community embedding process: {e}") -def create_entity_vector_index(gds, embedding_dimension=ENTITY_VECTOR_EMBEDDING_DIMENSION): - query = CREATE_ENTITY_VECTOR_INDEX.format( - index_name=ENTITY_VECTOR_INDEX_NAME, - embedding_dimension=embedding_dimension - ) + +def create_vector_index(gds, index_type,embedding_dimension=None): + drop_query = "" + query = "" + + if index_type == ENTITY_VECTOR_INDEX_NAME: + drop_query = DROP_ENTITY_VECTOR_INDEX_QUERY + query = CREATE_ENTITY_VECTOR_INDEX_QUERY.format( + index_name=ENTITY_VECTOR_INDEX_NAME, + embedding_dimension=embedding_dimension if embedding_dimension else ENTITY_VECTOR_EMBEDDING_DIMENSION + ) + elif index_type == COMMUNITY_VECTOR_INDEX_NAME: + drop_query = DROP_COMMUNITY_VECTOR_INDEX_QUERY + query = CREATE_COMMUNITY_VECTOR_INDEX_QUERY.format( + index_name=COMMUNITY_VECTOR_INDEX_NAME, + embedding_dimension=embedding_dimension if embedding_dimension else COMMUNITY_VECTOR_EMBEDDING_DIMENSION + ) + else: + logging.error(f"Invalid index type provided: {index_type}") + return + try: - logging.info(f"Running Cypher query to create entity vector index: {query}") + logging.info("Starting the process to create vector index.") + + logging.info(f"Executing drop query: {drop_query}") + gds.run_cypher(drop_query) + + logging.info(f"Executing create query: {query}") gds.run_cypher(query) - logging.info("Entity vector index created successfully.") + + logging.info(f"Vector index '{index_type}' created successfully.") + except Exception as e: - logging.error(f"Error occurred while creating entity vector index: {e}", exc_info=True) - + logging.error("An error occurred while creating the vector index.", exc_info=True) + logging.error(f"Error details: {str(e)}") + + +def create_fulltext_index(gds, index_type): + drop_query = "" + query = "" + + if index_type == COMMUNITY_FULLTEXT_INDEX_NAME: + drop_query = COMMUNITY_FULLTEXT_INDEX_DROP_QUERY + query = COMMUNITY_INDEX_FULL_TEXT_QUERY + else: + logging.error(f"Invalid index type provided: {index_type}") + return + + try: + logging.info("Starting the process to create full-text index.") + + logging.info(f"Executing drop query: {drop_query}") + gds.run_cypher(drop_query) + + logging.info(f"Executing create query: {query}") + gds.run_cypher(query) + + logging.info(f"Full-text index '{index_type}' created successfully.") + + except Exception as e: + logging.error("An error occurred while creating the full-text index.", exc_info=True) + logging.error(f"Error details: {str(e)}") + def create_community_properties(gds, model): commands = [ (CREATE_COMMUNITY_CONSTRAINT, "created community constraint to the graph."), @@ -360,9 +435,15 @@ def create_community_properties(gds, model): embedding_dimension = create_community_embeddings(gds) logging.info("Successfully created community embeddings.") - create_entity_vector_index(gds,embedding_dimension=embedding_dimension) + create_vector_index(gds=gds,index_type=ENTITY_VECTOR_INDEX_NAME,embedding_dimension=embedding_dimension) logging.info("Successfully created Entity Vector Index.") + create_vector_index(gds=gds,index_type=COMMUNITY_VECTOR_INDEX_NAME,embedding_dimension=embedding_dimension) + logging.info("Successfully created community Vector Index.") + + create_fulltext_index(gds=gds,index_type=COMMUNITY_FULLTEXT_INDEX_NAME) + logging.info("Successfully created community fulltext Index.") + except Exception as e: logging.error(f"Error during community properties creation: {e}") raise diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index c2e2f6424..e3b923f37 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -6,6 +6,7 @@ from src.document_sources.gcs_bucket import delete_file_from_gcs from src.shared.constants import BUCKET_UPLOAD from src.entities.source_node import sourceNode +from src.communities import MAX_COMMUNITY_LEVELS import json from dotenv import load_dotenv @@ -281,10 +282,23 @@ def delete_file_from_graph(self, filenames, source_types, deleteEntities:str, me match (c)-[:HAS_ENTITY]->(e) where not exists { (e)<-[:HAS_ENTITY]-()-[:PART_OF]->(d2) where not d2 in documents } detach delete e - """ + """ + query_to_delete_communities = """ + MATCH (c:`__Community__`) + WHERE NOT EXISTS { ()-[:IN_COMMUNITY]->(c) } AND c.level = 0 + DETACH DELETE c + + WITH * + UNWIND range(1, $max_level) AS level + MATCH (c:`__Community__`) + WHERE c.level = level AND NOT EXISTS { (c)<-[:PARENT_COMMUNITY]-(child) } + DETACH DELETE c + """ param = {"filename_list" : filename_list, "source_types_list": source_types_list} + community_param = {"max_level":MAX_COMMUNITY_LEVELS} if deleteEntities == "true": result = self.execute_query(query_to_delete_document_and_entities, param) + _ = self.execute_query(query_to_delete_communities,community_param) logging.info(f"Deleting {len(filename_list)} documents = '{filename_list}' from '{source_types_list}' from database") else : result = self.execute_query(query_to_delete_document, param) @@ -293,17 +307,29 @@ def delete_file_from_graph(self, filenames, source_types, deleteEntities:str, me def list_unconnected_nodes(self): query = """ - MATCH (e:!Chunk&!Document) - WHERE NOT exists { (e)--(:!Chunk&!Document) } - OPTIONAL MATCH (doc:Document)<-[:PART_OF]-(c:Chunk)-[:HAS_ENTITY]->(e) - RETURN e {.*, embedding:null, elementId:elementId(e), labels:labels(e)} as e, - collect(distinct doc.fileName) as documents, count(distinct c) as chunkConnections - ORDER BY e.id ASC - LIMIT 100 - """ + MATCH (e:!Chunk&!Document&!`__Community__`) + WHERE NOT exists { (e)--(:!Chunk&!Document&!`__Community__`) } + OPTIONAL MATCH (doc:Document)<-[:PART_OF]-(c:Chunk)-[:HAS_ENTITY]->(e) + RETURN + e { + .*, + embedding: null, + elementId: elementId(e), + labels: CASE + WHEN size(labels(e)) > 1 THEN + apoc.coll.removeAll(labels(e), ["__Entity__"]) + ELSE + ["Entity"] + END + } AS e, + collect(distinct doc.fileName) AS documents, + count(distinct c) AS chunkConnections + ORDER BY e.id ASC + LIMIT 100 + """ query_total_nodes = """ - MATCH (e:!Chunk&!Document) - WHERE NOT exists { (e)--(:!Chunk&!Document) } + MATCH (e:!Chunk&!Document&!`__Community__`) + WHERE NOT exists { (e)--(:!Chunk&!Document&!`__Community__`) } RETURN count(*) as total """ nodes_list = self.execute_query(query) @@ -323,9 +349,9 @@ def get_duplicate_nodes_list(self): score_value = float(os.environ.get('DUPLICATE_SCORE_VALUE')) text_distance = int(os.environ.get('DUPLICATE_TEXT_DISTANCE')) query_duplicate_nodes = """ - MATCH (n:!Chunk&!Document) with n - WHERE n.embedding is not null and n.id is not null // and size(n.id) > 3 - WITH n ORDER BY count {{ (n)--() }} DESC, size(n.id) DESC // updated + MATCH (n:!Chunk&!Document&!`__Community__`) with n + WHERE n.embedding is not null and n.id is not null // and size(toString(n.id)) > 3 + WITH n ORDER BY count {{ (n)--() }} DESC, size(toString(n.id)) DESC // updated WITH collect(n) as nodes UNWIND nodes as n WITH n, [other in nodes @@ -335,9 +361,9 @@ def get_duplicate_nodes_list(self): AND ( // either contains each other as substrings or has a text edit distinct of less than 3 - (size(other.id) > 2 AND toLower(n.id) CONTAINS toLower(other.id)) OR - (size(n.id) > 2 AND toLower(other.id) CONTAINS toLower(n.id)) - OR (size(n.id)>5 AND apoc.text.distance(toLower(n.id), toLower(other.id)) < $duplicate_text_distance) + (size(toString(other.id)) > 2 AND toLower(n.id) CONTAINS toLower(other.id)) OR + (size(toString(n.id)) > 2 AND toLower(other.id) CONTAINS toLower(n.id)) + OR (size(toString(n.id))>5 AND apoc.text.distance(toLower(n.id), toLower(other.id)) < $duplicate_text_distance) OR vector.similarity.cosine(other.embedding, n.embedding) > $duplicate_score_value )] as similar diff --git a/backend/src/post_processing.py b/backend/src/post_processing.py index 88d99385a..22af50b76 100644 --- a/backend/src/post_processing.py +++ b/backend/src/post_processing.py @@ -16,6 +16,55 @@ COMMUNITY_INDEX_DROP_QUERY = "DROP INDEX community_keyword IF EXISTS;" COMMUNITY_INDEX_FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX community_keyword FOR (n:`__Community__`) ON EACH [n.summary]" +CHUNK_VECTOR_INDEX_NAME = "vector" +CHUNK_VECTOR_EMBEDDING_DIMENSION = 384 + +DROP_CHUNK_VECTOR_INDEX_QUERY = f"DROP INDEX {CHUNK_VECTOR_INDEX_NAME} IF EXISTS;" +CREATE_CHUNK_VECTOR_INDEX_QUERY = """ +CREATE VECTOR INDEX {index_name} IF NOT EXISTS FOR (c:Chunk) ON c.embedding +OPTIONS {{ + indexConfig: {{ + `vector.dimensions`: {embedding_dimension}, + `vector.similarity_function`: 'cosine' + }} +}} +""" + +def create_vector_index(driver, index_type, embedding_dimension=None): + drop_query = "" + query = "" + + if index_type == CHUNK_VECTOR_INDEX_NAME: + drop_query = DROP_CHUNK_VECTOR_INDEX_QUERY + query = CREATE_CHUNK_VECTOR_INDEX_QUERY.format( + index_name=CHUNK_VECTOR_INDEX_NAME, + embedding_dimension=embedding_dimension if embedding_dimension else CHUNK_VECTOR_EMBEDDING_DIMENSION + ) + else: + logging.error(f"Invalid index type provided: {index_type}") + return + + try: + logging.info("Starting the process to create vector index.") + with driver.session() as session: + try: + start_step = time.time() + session.run(drop_query) + logging.info(f"Dropped existing index (if any) in {time.time() - start_step:.2f} seconds.") + except Exception as e: + logging.error(f"Failed to drop index: {e}") + return + + try: + start_step = time.time() + session.run(query) + logging.info(f"Created vector index in {time.time() - start_step:.2f} seconds.") + except Exception as e: + logging.error(f"Failed to create vector index: {e}") + return + except Exception as e: + logging.error("An error occurred while creating the vector index.", exc_info=True) + logging.error(f"Error details: {str(e)}") def create_fulltext(driver,type): @@ -70,8 +119,8 @@ def create_fulltext(driver,type): logging.info(f"Process completed in {time.time() - start_time:.2f} seconds.") -def create_fulltext_indexes(uri, username, password, database): - types = ["entities", "hybrid", "community"] +def create_vector_fulltext_indexes(uri, username, password, database): + types = ["entities", "hybrid"] logging.info("Starting the process of creating full-text indexes.") try: @@ -79,7 +128,7 @@ def create_fulltext_indexes(uri, username, password, database): driver.verify_connectivity() logging.info("Database connectivity verified.") except Exception as e: - logging.error(f"An unexpected error occurred: {e}") + logging.error(f"Error connecting to the database: {e}") return for index_type in types: @@ -90,11 +139,22 @@ def create_fulltext_indexes(uri, username, password, database): except Exception as e: logging.error(f"Failed to create full-text index for type '{index_type}': {e}") - logging.info("Full-text indexes creation process completed.") - driver.close() - logging.info("Driver closed.") - - + try: + logging.info(f"Creating a vector index for type '{CHUNK_VECTOR_INDEX_NAME}'.") + create_vector_index(driver, CHUNK_VECTOR_INDEX_NAME,CHUNK_VECTOR_EMBEDDING_DIMENSION) + logging.info("Vector index for chunk created successfully.") + except Exception as e: + logging.error(f"Failed to create vector index for '{CHUNK_VECTOR_INDEX_NAME}': {e}") + + try: + driver.close() + logging.info("Driver closed successfully.") + except Exception as e: + logging.error(f"Error closing the driver: {e}") + + logging.info("Full-text and vector index creation process completed.") + + def create_entity_embedding(graph:Neo4jGraph): rows = fetch_entities_for_embedding(graph) for i in range(0, len(rows), 1000): diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 834459fd1..20bbffa3f 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -219,7 +219,7 @@ QUESTION_TRANSFORM_TEMPLATE = "Given the below conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else." ## CHAT QUERIES -VECTOR_SEARCH_TOP_K = 10 +VECTOR_SEARCH_TOP_K = 5 VECTOR_SEARCH_QUERY = """ WITH node AS chunk, score @@ -246,11 +246,11 @@ """ ### Vector graph search -VECTOR_GRAPH_SEARCH_ENTITY_LIMIT = 25 +VECTOR_GRAPH_SEARCH_ENTITY_LIMIT = 40 VECTOR_GRAPH_SEARCH_EMBEDDING_MIN_MATCH = 0.3 VECTOR_GRAPH_SEARCH_EMBEDDING_MAX_MATCH = 0.9 -VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MINMAX_CASE = 10 -VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MAX_CASE = 25 +VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MINMAX_CASE = 20 +VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MAX_CASE = 40 VECTOR_GRAPH_SEARCH_QUERY_PREFIX = """ WITH node as chunk, score From 7f255eee875d576ba370f29a32d29fc780e6910a Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Wed, 9 Oct 2024 11:37:18 +0530 Subject: [PATCH 156/292] Async way to create entities from multiple chunks (#788) * LLMs with latest langchain dev libraries * conflict resolved * all llm models with latest library changes * async way to get graph documents * indentation correction --- backend/score.py | 18 ++++++------------ backend/src/llm.py | 32 ++++++++++++++++---------------- backend/src/main.py | 44 ++++++++++++++++++++++---------------------- 3 files changed, 44 insertions(+), 50 deletions(-) diff --git a/backend/score.py b/backend/score.py index 7bc62d186..2908c89ea 100644 --- a/backend/score.py +++ b/backend/score.py @@ -178,28 +178,22 @@ async def extract_knowledge_graph_from_file( if source_type == 'local file': merged_file_path = os.path.join(MERGED_DIR,file_name) logging.info(f'File path:{merged_file_path}') - result = await asyncio.to_thread( - extract_graph_from_file_local_file, uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, retry_condition) + result = await extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 's3 bucket' and source_url: - result = await asyncio.to_thread( - extract_graph_from_file_s3, uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition) + result = await extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'web-url': - result = await asyncio.to_thread( - extract_graph_from_web_page, uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) + result = await extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'youtube' and source_url: - result = await asyncio.to_thread( - extract_graph_from_file_youtube, uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) + result = await extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'Wikipedia' and wiki_query: - result = await asyncio.to_thread( - extract_graph_from_file_Wikipedia, uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition) + result = await extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'gcs bucket' and gcs_bucket_name: - result = await asyncio.to_thread( - extract_graph_from_file_gcs, uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition) + result = await extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition) else: return create_api_response('Failed',message='source_type is other than accepted source') extract_api_time = time.time() - start_time diff --git a/backend/src/llm.py b/backend/src/llm.py index c2335685f..c7c6b36e2 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -141,7 +141,7 @@ def get_combined_chunks(chunkId_chunkDoc_list): return combined_chunk_document_list -def get_graph_document_list( +async def get_graph_document_list( llm, combined_chunk_document_list, allowedNodes, allowedRelationship ): futures = [] @@ -165,23 +165,23 @@ def get_graph_document_list( ignore_tool_usage=True, #prompt = ChatPromptTemplate.from_messages(["system",PROMPT_TO_ALL_LLMs]) ) - with ThreadPoolExecutor(max_workers=10) as executor: - for chunk in combined_chunk_document_list: - chunk_doc = Document( - page_content=chunk.page_content.encode("utf-8"), metadata=chunk.metadata - ) - futures.append( - executor.submit(llm_transformer.convert_to_graph_documents, [chunk_doc]) - ) - - for i, future in enumerate(concurrent.futures.as_completed(futures)): - graph_document = future.result() - graph_document_list.append(graph_document[0]) - + # with ThreadPoolExecutor(max_workers=10) as executor: + # for chunk in combined_chunk_document_list: + # chunk_doc = Document( + # page_content=chunk.page_content.encode("utf-8"), metadata=chunk.metadata + # ) + # futures.append( + # executor.submit(llm_transformer.convert_to_graph_documents, [chunk_doc]) + # ) + + # for i, future in enumerate(concurrent.futures.as_completed(futures)): + # graph_document = future.result() + # graph_document_list.append(graph_document[0]) + graph_document_list = await llm_transformer.aconvert_to_graph_documents(combined_chunk_document_list) return graph_document_list -def get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship): +async def get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship): llm, model_name = get_llm(model) combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list) @@ -195,7 +195,7 @@ def get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelati else: allowedRelationship = allowedRelationship.split(',') - graph_document_list = get_graph_document_list( + graph_document_list = await get_graph_document_list( llm, combined_chunk_document_list, allowedNodes, allowedRelationship ) return graph_document_list diff --git a/backend/src/main.py b/backend/src/main.py index 731ff56e5..8b95b2e88 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -208,7 +208,7 @@ def create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type lst_file_name.append({'fileName':obj_source_node.file_name,'fileSize':obj_source_node.file_size,'url':obj_source_node.url, 'language':obj_source_node.language, 'status':'Success'}) return lst_file_name,success_count,failed_count -def extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, fileName, allowedNodes, allowedRelationship, retry_condition): +async def extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, fileName, allowedNodes, allowedRelationship, retry_condition): logging.info(f'Process file name :{fileName}') if retry_condition is None: @@ -220,11 +220,11 @@ def extract_graph_from_file_local_file(uri, userName, password, database, model, file_name, pages, file_extension = get_documents_from_file_by_path(merged_file_path,fileName) if pages==None or len(pages)==0: raise Exception(f'File content is not available for file : {file_name}') - return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, True, merged_file_path) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, True, merged_file_path) else: - return processing_source(uri, userName, password, database, model, fileName, [], allowedNodes, allowedRelationship, True, merged_file_path, retry_condition) + return await processing_source(uri, userName, password, database, model, fileName, [], allowedNodes, allowedRelationship, True, merged_file_path, retry_condition) -def extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition): +async def extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition): if retry_condition is None: if(aws_access_key_id==None or aws_secret_access_key==None): raise Exception('Please provide AWS access and secret keys') @@ -234,49 +234,49 @@ def extract_graph_from_file_s3(uri, userName, password, database, model, source_ if pages==None or len(pages)==0: raise Exception(f'File content is not available for file : {file_name}') - return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) else: - return processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) + return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) -def extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition): +async def extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition): if retry_condition is None: file_name, pages = get_documents_from_web_page(source_url) if pages==None or len(pages)==0: raise Exception(f'Content is not available for given URL : {file_name}') - return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) else: - return processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) + return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) -def extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition): +async def extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition): if retry_condition is None: file_name, pages = get_documents_from_youtube(source_url) if pages==None or len(pages)==0: raise Exception(f'Youtube transcript is not available for file : {file_name}') - return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) else: - return processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) + return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) -def extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition): +async def extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition): if retry_condition is None: file_name, pages = get_documents_from_Wikipedia(wiki_query, language) if pages==None or len(pages)==0: raise Exception(f'Wikipedia page is not available for file : {file_name}') - return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) else: - return processing_source(uri, userName, password, database, model, file_name,[], allowedNodes, allowedRelationship, retry_condition=retry_condition) + return await processing_source(uri, userName, password, database, model, file_name,[], allowedNodes, allowedRelationship, retry_condition=retry_condition) -def extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition): +async def extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition): if retry_condition is None: file_name, pages = get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token) if pages==None or len(pages)==0: raise Exception(f'File content is not available for file : {file_name}') - return processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) + return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) else: - return processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) + return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) -def processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, is_uploaded_from_local=None, merged_file_path=None, retry_condition=None): +async def processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, is_uploaded_from_local=None, merged_file_path=None, retry_condition=None): """ Extracts a Neo4jGraph from a PDF file based on the model. @@ -366,7 +366,7 @@ def processing_source(uri, userName, password, database, model, file_name, pages break else: processing_chunks_start_time = time.time() - node_count,rel_count,latency_processed_chunk = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count) + node_count,rel_count,latency_processed_chunk = await processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count) processing_chunks_end_time = time.time() processing_chunks_elapsed_end_time = processing_chunks_end_time - processing_chunks_start_time logging.info(f"Time taken {update_graph_chunk_processed} chunks processed upto {select_chunks_upto} completed in {processing_chunks_elapsed_end_time:.2f} seconds for file name {file_name}") @@ -439,7 +439,7 @@ def processing_source(uri, userName, password, database, model, file_name, pages logging.error(error_message) raise Exception(error_message) -def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship, node_count, rel_count): +async def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship, node_count, rel_count): #create vector index and update chunk node with embedding if graph is not None: if graph._driver._closed: @@ -456,7 +456,7 @@ def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, datab logging.info("Get graph document list from models") start_entity_extraction = time.time() - graph_documents = get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship) + graph_documents = await get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship) end_entity_extraction = time.time() elapsed_entity_extraction = end_entity_extraction - start_entity_extraction logging.info(f'Time taken to extract enitities from LLM Graph Builder: {elapsed_entity_extraction:.2f} seconds') From 943f5392ad377ddaf88445119b71be2a722476df Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 10 Oct 2024 12:29:12 +0530 Subject: [PATCH 157/292] fixed graph mode error (#792) --- backend/score.py | 5 +++-- backend/src/QA_integration.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/score.py b/backend/score.py index 2908c89ea..221e58b61 100644 --- a/backend/score.py +++ b/backend/score.py @@ -300,8 +300,9 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), graph = Neo4jGraph( url=uri,username=userName,password=password,database=database,sanitize = True, refresh_schema=True) else: graph = create_graph_database_connection(uri, userName, password, database) - graph_DB_dataAccess = graphDBdataAccess(graph) - write_access = graph_DB_dataAccess.check_account_access(database=database) + + graph_DB_dataAccess = graphDBdataAccess(graph) + write_access = graph_DB_dataAccess.check_account_access(database=database) result = await asyncio.to_thread(QA_RAG,graph=graph,model=model,question=question,document_names=document_names,session_id=session_id,mode=mode,write_access=write_access) total_call_time = time.time() - qa_rag_start_time diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 169d2b8c3..d0c0fa9f4 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -505,6 +505,7 @@ def create_graph_chain(model, graph): validate_cypher= True, graph=graph, # verbose=True, + allow_dangerous_requests=True, return_intermediate_steps = True, top_k=3 ) From 126dd4800b7e792f833b91f3954488ad2c51fac2 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 10 Oct 2024 13:05:02 +0530 Subject: [PATCH 158/292] Raga's Evaluation Metrics (#787) * added Multi modes selection * ragas eval * added response * multimodes state mangement * fix: state handling of chat details of the default mode * Added the ChatModeSwitch Component * modes switch statemangement * added the chatmodes switch in both view * removed the copied text * Handled the error scenario * fix: speech issue between modes * ragas evaluation metric show * Output return type changed * fix: Handled activespeech speech and othermessage modes switch * used requestanimationframe instead of setTimeOut * removed the commented code * Added ragas to requirements * Integrated the metric api * ragas response updated, llm list updated * resolved syntax error in score * Added the Metrics Table * fix: Long text UI Issue * code optimization for evaluation * added the download button for downloading the info * key name change * Optimized the downloadClickHandler --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kaustubh-darekar Co-authored-by: a-s-poorna --- backend/requirements.txt | 1 + backend/score.py | 20 ++- backend/src/QA_integration.py | 43 +++-- backend/src/ragas_eval.py | 150 ++++++++++++++++++ .../src/components/ChatBot/ChatInfoModal.tsx | 87 ++++++---- frontend/src/components/ChatBot/Chatbot.tsx | 140 ++++++++++++++-- .../src/components/ChatBot/MetricsTab.tsx | 113 +++++++++++++ frontend/src/services/GetRagasMetric.ts | 17 ++ frontend/src/types.ts | 34 ++++ 9 files changed, 550 insertions(+), 55 deletions(-) create mode 100644 backend/src/ragas_eval.py create mode 100644 frontend/src/components/ChatBot/MetricsTab.tsx create mode 100644 frontend/src/services/GetRagasMetric.ts diff --git a/backend/requirements.txt b/backend/requirements.txt index 30b767939..8fc0e0bda 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -179,4 +179,5 @@ PyMuPDF==1.24.5 pypandoc==1.13 graphdatascience==1.10 Secweb==1.11.0 +ragas==0.1.14 diff --git a/backend/score.py b/backend/score.py index 221e58b61..920fa00f9 100644 --- a/backend/score.py +++ b/backend/score.py @@ -33,6 +33,8 @@ from Secweb.XContentTypeOptions import XContentTypeOptions from Secweb.XFrameOptions import XFrame +from src.ragas_eval import * + logger = CustomLogger() CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") MERGED_DIR = os.path.join(os.path.dirname(__file__), "merged_files") @@ -706,7 +708,23 @@ async def retry_processing(uri=Form(), userName=Form(), password=Form(), databas logging.exception(f'{error_message}') return create_api_response(job_status, message=message, error=error_message) finally: - gc.collect() + gc.collect() + +@app.post('/metric') +async def calculate_metric(question=Form(),context=Form(),answer=Form(),model=Form()): + try: + result = await asyncio.to_thread(get_ragas_metrics,question,context,answer,model) + if result is None: + return create_api_response('Failed', message='Failed to calculate metrics.',error="Ragas evaluation returned null") + return create_api_response('Success',data=result,message=f"Status set to Reprocess for filename : {result}") + except Exception as e: + job_status = "Failed" + message="Error while calculating evaluation metrics" + error_message = str(e) + logging.exception(f'{error_message}') + return create_api_response(job_status, message=message, error=error_message) + finally: + gc.collect() if __name__ == "__main__": uvicorn.run(app) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index d0c0fa9f4..30375a9d9 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -22,7 +22,8 @@ from langchain_text_splitters import TokenTextSplitter from langchain_core.messages import HumanMessage, AIMessage from langchain.chains import GraphCypherQAChain -from langchain_community.chat_message_histories import ChatMessageHistory +from langchain_community.chat_message_histories import ChatMessageHistory +from langchain_core.callbacks import StdOutCallbackHandler, BaseCallbackHandler # LangChain chat models from langchain_openai import ChatOpenAI, AzureChatOpenAI @@ -38,13 +39,12 @@ from src.shared.common_fn import load_embedding_model from src.shared.constants import * from src.graphDB_dataAccess import graphDBdataAccess +from src.ragas_eval import get_ragas_metrics load_dotenv() EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') EMBEDDING_FUNCTION , _ = load_embedding_model(EMBEDDING_MODEL) - - class SessionChatHistory: history_dict = {} @@ -58,6 +58,17 @@ def get_chat_history(cls, session_id): logging.info(f"Retrieved existing ChatMessageHistory Local for session ID: {session_id}") return cls.history_dict[session_id] +class CustomCallback(BaseCallbackHandler): + + def __init__(self): + self.transformed_question = None + + def on_llm_end( + self,response, **kwargs: Any + ) -> None: + logging.info("question transformed") + self.transformed_question = response.generations[0][0].text.strip() + def get_history_by_session_id(session_id): try: return SessionChatHistory.get_chat_history(session_id) @@ -250,13 +261,17 @@ def process_documents(docs, question, messages, llm, model,chat_mode_settings): logging.error(f"Error processing documents: {e}") raise - return content, result, total_tokens + return content, result, total_tokens, formatted_docs def retrieve_documents(doc_retriever, messages): start_time = time.time() try: - docs = doc_retriever.invoke({"messages": messages}) + handler = CustomCallback() + docs = doc_retriever.invoke({"messages": messages},{"callbacks":[handler]}) + transformed_question = handler.transformed_question + if transformed_question: + logging.info(f"Transformed question : {transformed_question}") doc_retrieval_time = time.time() - start_time logging.info(f"Documents retrieved in {doc_retrieval_time:.2f} seconds") @@ -264,7 +279,7 @@ def retrieve_documents(doc_retriever, messages): logging.error(f"Error retrieving documents: {e}") raise - return docs + return docs,transformed_question def create_document_retriever_chain(llm, retriever): try: @@ -408,14 +423,19 @@ def process_chat_response(messages, history, question, model, graph, document_na try: llm, doc_retriever, model_version = setup_chat(model, graph, document_names, chat_mode_settings) - docs = retrieve_documents(doc_retriever, messages) + docs,transformed_question = retrieve_documents(doc_retriever, messages) if docs: - content, result, total_tokens = process_documents(docs, question, messages, llm, model, chat_mode_settings) + content, result, total_tokens,formatted_docs = process_documents(docs, question, messages, llm, model, chat_mode_settings) else: content = "I couldn't find any relevant documents to answer your question." result = {"sources": list(), "nodedetails": list(), "entities": list()} total_tokens = 0 + formatted_docs = "" + + question = transformed_question if transformed_question else question + # metrics = get_ragas_metrics(question,formatted_docs,content) + # print(metrics) ai_response = AIMessage(content=content) messages.append(ai_response) @@ -424,11 +444,12 @@ def process_chat_response(messages, history, question, model, graph, document_na summarization_thread.start() logging.info("Summarization thread started.") # summarize_and_log(history, messages, llm) - + metric_details = {"question":question,"contexts":formatted_docs,"answer":content} return { "session_id": "", "message": content, "info": { + # "metrics" : metrics, "sources": result["sources"], "model": model_version, "nodedetails": result["nodedetails"], @@ -436,7 +457,9 @@ def process_chat_response(messages, history, question, model, graph, document_na "response_time": 0, "mode": chat_mode_settings["mode"], "entities": result["entities"], + "metric_details": metric_details, }, + "user": "chatbot" } @@ -446,6 +469,7 @@ def process_chat_response(messages, history, question, model, graph, document_na "session_id": "", "message": "Something went wrong", "info": { + "metrics" : [], "sources": [], "nodedetails": [], "total_tokens": 0, @@ -453,6 +477,7 @@ def process_chat_response(messages, history, question, model, graph, document_na "error": f"{type(e).__name__}: {str(e)}", "mode": chat_mode_settings["mode"], "entities": [], + "metric_details": {}, }, "user": "chatbot" } diff --git a/backend/src/ragas_eval.py b/backend/src/ragas_eval.py new file mode 100644 index 000000000..940a809dc --- /dev/null +++ b/backend/src/ragas_eval.py @@ -0,0 +1,150 @@ +import os +import logging +import time +from typing import Dict, Tuple, Optional +import boto3 +from datasets import Dataset +from dotenv import load_dotenv +from langchain_anthropic import ChatAnthropic +from langchain_aws import ChatBedrock +from langchain_community.chat_models import ChatOllama +from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer +from langchain_fireworks import ChatFireworks +from langchain_google_vertexai import ( + ChatVertexAI, + HarmBlockThreshold, + HarmCategory, +) +from langchain_groq import ChatGroq +from langchain_openai import AzureChatOpenAI, ChatOpenAI +from ragas import evaluate +from ragas.metrics import answer_relevancy, context_utilization, faithfulness +from src.shared.common_fn import load_embedding_model + +load_dotenv() + +# Constants for clarity and maintainability +RAGAS_MODEL_VERSIONS = { + "openai-gpt-3.5": "gpt-3.5-turbo-16k", + "gemini-1.0-pro": "gemini-1.0-pro-001", + "gemini-1.5-pro": "gemini-1.5-pro-002", + "gemini-1.5-flash": "gemini-1.5-flash-002", + "openai-gpt-4": "gpt-4-turbo-2024-04-09", + "openai-gpt-4o-mini": "gpt-4o-mini-2024-07-18", + "openai-gpt-4o": "gpt-4o-mini-2024-07-18", + "diffbot": "gpt-4-turbo-2024-04-09", + "groq-llama3": "groq_llama3_70b", +} + +EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL") +EMBEDDING_FUNCTION, _ = load_embedding_model(EMBEDDING_MODEL) + + +def get_ragas_llm(model: str) -> Tuple[object, str]: + """Retrieves the specified language model. Improved error handling and structure.""" + env_key = f"LLM_MODEL_CONFIG_{model}" + env_value = os.environ.get(env_key) + logging.info(f"Loading model configuration: {env_key}") + try: + if "gemini" in model: + credentials, project_id = google.auth.default() + model_name = RAGAS_MODEL_VERSIONS[model] + llm = ChatVertexAI( + model_name=model_name, + credentials=credentials, + project=project_id, + temperature=0, + safety_settings={ + #setting safety to NONE for all categories. Consider reviewing this for production systems + HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, + HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, + }, + ) + elif "openai" in model: + model_name = RAGAS_MODEL_VERSIONS[model] + llm = ChatOpenAI( + api_key=os.environ.get("OPENAI_API_KEY"), model=model_name, temperature=0 + ) + + elif "azure" in model: + model_name, api_endpoint, api_key, api_version = env_value.split(",") + llm = AzureChatOpenAI( + api_key=api_key, + azure_endpoint=api_endpoint, + azure_deployment=model_name, + api_version=api_version, + temperature=0, + ) + elif "anthropic" in model: + model_name, api_key = env_value.split(",") + llm = ChatAnthropic(api_key=api_key, model=model_name, temperature=0) + elif "fireworks" in model: + model_name, api_key = env_value.split(",") + llm = ChatFireworks(api_key=api_key, model=model_name) + elif "groq" in model: + model_name, base_url, api_key = env_value.split(",") + llm = ChatGroq(api_key=api_key, model_name=model_name, temperature=0) + elif "bedrock" in model: + model_name, aws_access_key, aws_secret_key, region_name = env_value.split(",") + bedrock_client = boto3.client( + service_name="bedrock-runtime", + region_name=region_name, + aws_access_key_id=aws_access_key, + aws_secret_access_key=aws_secret_key, + ) + llm = ChatBedrock( + client=bedrock_client, model_id=model_name, model_kwargs=dict(temperature=0) + ) + elif "ollama" in model: + model_name, base_url = env_value.split(",") + llm = ChatOllama(base_url=base_url, model=model_name) + elif "diffbot" in model: + llm = DiffbotGraphTransformer( + diffbot_api_key=os.environ.get("DIFFBOT_API_KEY"), + extract_types=["entities", "facts"], + ) + else: + raise ValueError(f"Unsupported model: {model}") + + logging.info(f"Model loaded - Model Version: {model}") + return llm, model_name + except (ValueError, KeyError) as e: + logging.error(f"Error loading LLM: {e}") + raise + + +def get_ragas_metrics( + question: str, context: str, answer: str, model: str +) -> Optional[Dict[str, float]]: + """Calculates RAGAS metrics.""" + try: + start_time = time.time() + dataset = Dataset.from_dict( + {"question": [question], "answer": [answer], "contexts": [[context]]} + ) + logging.info("Dataset created successfully.") + + llm, model_name = get_ragas_llm(model=model) + logging.info(f"Evaluating with model: {model_name}") + + score = evaluate( + dataset=dataset, + metrics=[faithfulness, answer_relevancy, context_utilization], + llm=llm, + embeddings=EMBEDDING_FUNCTION, + ) + + score_dict = ( + score.to_pandas()[["faithfulness", "answer_relevancy", "context_utilization"]] + .round(4) + .to_dict(orient="records")[0] + ) + end_time = time.time() + logging.info(f"Evaluation completed in: {end_time - start_time:.2f} seconds") + return score_dict + except Exception as e: + logging.exception(f"Error during metrics evaluation: {e}") + return None \ No newline at end of file diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 894833884..199d9eb6d 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -12,15 +12,7 @@ import { import { DocumentDuplicateIconOutline, ClipboardDocumentCheckIconOutline } from '@neo4j-ndl/react/icons'; import '../../styling/info.css'; import Neo4jRetrievalLogo from '../../assets/images/Neo4jRetrievalLogo.png'; -import { - Chunk, - Community, - Entity, - ExtendedNode, - ExtendedRelationship, - UserCredentials, - chatInfoMessage, -} from '../../types'; +import { Entity, ExtendedNode, UserCredentials, chatInfoMessage } from '../../types'; import { useContext, useEffect, useMemo, useState } from 'react'; import GraphViewButton from '../Graph/GraphViewButton'; import { chunkEntitiesAPI } from '../../services/ChunkEntitiesInfo'; @@ -33,6 +25,8 @@ import SourcesInfo from './SourcesInfo'; import CommunitiesInfo from './Communities'; import { chatModeLables } from '../../utils/Constants'; import { Relationship } from '@neo4j-nvl/base'; +import { getChatMetrics } from '../../services/GetRagasMetric'; +import MetricsTab from './MetricsTab'; const ChatInfoModal: React.FC = ({ sources, @@ -45,19 +39,33 @@ const ChatInfoModal: React.FC = ({ graphonly_entities, error, entities_ids, + metricanswer, + metriccontexts, + metricquestion, + metricmodel, + saveNodes, + saveChunks, + saveChatRelationships, + saveCommunities, + saveInfoEntitites, + saveMetrics, + toggleInfoLoading, + toggleMetricsLoading, + nodes, + chunks, + infoEntities, + communities, + metricDetails, + relationships, + infoLoading, + metricsLoading, }) => { const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); const [activeTab, setActiveTab] = useState( error?.length ? 10 : mode === chatModeLables.global_vector ? 7 : mode === chatModeLables.graph ? 4 : 3 ); - const [infoEntities, setInfoEntities] = useState([]); - const [communities, setCommunities] = useState([]); - const [loading, setLoading] = useState(false); const { userCredentials } = useCredentials(); - const [nodes, setNodes] = useState([]); - const [relationships, setRelationships] = useState([]); - const [chunks, setChunks] = useState([]); const themeUtils = useContext(ThemeWrapperContext); const [, copy] = useCopyToClipboard(); const [copiedText, setcopiedText] = useState(false); @@ -88,7 +96,7 @@ const ChatInfoModal: React.FC = ({ useEffect(() => { if (mode != chatModeLables.graph || error?.trim() !== '') { (async () => { - setLoading(true); + toggleInfoLoading(); try { const response = await chunkEntitiesAPI( userCredentials as UserCredentials, @@ -110,7 +118,7 @@ const ChatInfoModal: React.FC = ({ const communitiesData = response?.data?.data?.community_data; const chunksData = response?.data?.data?.chunk_data; - setInfoEntities( + saveInfoEntitites( nodesData.map((n: Entity) => { if (!n.labels.length && mode === chatModeLables.entity_vector) { return { @@ -121,7 +129,7 @@ const ChatInfoModal: React.FC = ({ return n; }) ); - setNodes( + saveNodes( nodesData.map((n: ExtendedNode) => { if (!n.labels.length && mode === chatModeLables.entity_vector) { return { @@ -132,8 +140,8 @@ const ChatInfoModal: React.FC = ({ return n ?? []; }) ); - setRelationships(relationshipsData ?? []); - setCommunities( + saveChatRelationships(relationshipsData ?? []); + saveCommunities( (communitiesData || []) .map((community: { element_id: string }) => { const communityScore = nodeDetails?.communitydetails?.find( @@ -147,7 +155,7 @@ const ChatInfoModal: React.FC = ({ .sort((a: any, b: any) => b.score - a.score) ); - setChunks( + saveChunks( chunksData .map((chunk: any) => { const chunkScore = nodeDetails?.chunkdetails?.find((c: any) => c.id === chunk.id); @@ -158,13 +166,31 @@ const ChatInfoModal: React.FC = ({ }) .sort((a: any, b: any) => b.score - a.score) ); - setLoading(false); + toggleInfoLoading(); } catch (error) { console.error('Error fetching information:', error); - setLoading(false); + toggleInfoLoading(); } })(); } + (async () => { + try { + toggleMetricsLoading(); + const response = await getChatMetrics(metricquestion, metriccontexts, metricanswer, metricmodel); + toggleMetricsLoading(); + if (response.data.status === 'Success') { + saveMetrics({ ...response.data.data, error: '' }); + } else { + throw new Error(response.data.error); + } + } catch (error) { + if (error instanceof Error) { + toggleMetricsLoading(); + console.log('Error in getting chat metrics', error); + saveMetrics({ error: error.message, faithfulness: 0, answer_relevancy: 0, context_utilization: 0 }); + } + } + })(); () => { setcopiedText(false); }; @@ -185,7 +211,7 @@ const ChatInfoModal: React.FC = ({ Retrieval information - To generate this response, the process took {response_time} seconds,{' '} + To generate this response, the process took {response_time} seconds, utilizing {total_tokens} tokens with the model{' '} {model} in{' '} {mode !== 'vector' ? mode.replace(/\+/g, ' & ') : mode} mode. @@ -197,7 +223,6 @@ const ChatInfoModal: React.FC = ({ ) : ( {mode === chatModeLables.global_vector ? ( - // Only show the Communities tab if mode is global Communities ) : ( <> @@ -217,24 +242,28 @@ const ChatInfoModal: React.FC = ({ <> )} {mode === chatModeLables.entity_vector ? Communities : <>} + Evaluation Metrics )} )} - + + + + - + = ({ {mode === chatModeLables.entity_vector || mode === chatModeLables.global_vector ? ( - + ) : ( <> diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index e075777db..f085fb696 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -1,4 +1,4 @@ -import React, { FC, lazy, Suspense, useCallback, useEffect, useRef, useState } from 'react'; +import React, { FC, lazy, Suspense, useCallback, useEffect, useReducer, useRef, useState } from 'react'; import { Widget, Typography, @@ -9,10 +9,24 @@ import { useCopyToClipboard, Flex, Box, + TextLink, } from '@neo4j-ndl/react'; -import { XMarkIconOutline } from '@neo4j-ndl/react/icons'; +import { ArrowDownTrayIconOutline, XMarkIconOutline } from '@neo4j-ndl/react/icons'; import ChatBotAvatar from '../../assets/images/chatbot-ai.png'; -import { ChatbotProps, CustomFile, Messages, ResponseMode, UserCredentials, nodeDetailsProps } from '../../types'; +import { + ChatbotProps, + Chunk, + Community, + CustomFile, + Entity, + ExtendedNode, + ExtendedRelationship, + Messages, + MetricsState, + ResponseMode, + UserCredentials, + nodeDetailsProps, +} from '../../types'; import { useCredentials } from '../../context/UserCredentials'; import { chatBotAPI } from '../../services/QnaAPI'; import { v4 as uuidv4 } from 'uuid'; @@ -54,6 +68,19 @@ const Chatbot: FC = (props) => { const [messageError, setmessageError] = useState(''); const [entitiesModal, setEntitiesModal] = useState([]); const [nodeDetailsModal, setNodeDetailsModal] = useState({}); + const [metricQuestion, setMetricQuestion] = useState(''); + const [metricAnswer, setMetricAnswer] = useState(''); + const [metricContext, setMetricContext] = useState(''); + const [nodes, setNodes] = useState([]); + const [relationships, setRelationships] = useState([]); + const [chunks, setChunks] = useState([]); + const [metricDetails, setMetricDetails] = useState(null); + const [infoEntities, setInfoEntities] = useState([]); + const [communities, setCommunities] = useState([]); + const [infoLoading, toggleInfoLoading] = useReducer((s) => !s, false); + const [metricsLoading, toggleMetricsLoading] = useReducer((s) => !s, false); + const downloadLinkRef = useRef(null); + const [activeChat, setActiveChat] = useState(); const [_, copy] = useCopyToClipboard(); const { speak, cancel, speaking } = useSpeechSynthesis({ @@ -70,6 +97,27 @@ const Chatbot: FC = (props) => { setInputMessage(e.target.value); }; + const saveInfoEntitites = (entities: Entity[]) => { + setInfoEntities(entities); + }; + + const saveNodes = (chatNodes: ExtendedNode[]) => { + setNodes(chatNodes); + }; + + const saveChatRelationships = (chatRels: ExtendedRelationship[]) => { + setRelationships(chatRels); + }; + + const saveChunks = (chatChunks: Chunk[]) => { + setChunks(chatChunks); + }; + const saveMetrics = (metricInfo: MetricsState) => { + setMetricDetails(metricInfo); + }; + const saveCommunities = (chatCommunities: Community[]) => { + setCommunities(chatCommunities); + }; useEffect(() => { if (!sessionStorage.getItem('session_id')) { const id = uuidv4(); @@ -173,7 +221,6 @@ const Chatbot: FC = (props) => { const results = await Promise.allSettled(apiCalls); results.forEach((result, index) => { const mode = chatModes[index]; - console.log({ mode }); if (result.status === 'fulfilled') { // @ts-ignore if (result.value.response.data.status === 'Success') { @@ -189,13 +236,16 @@ const Chatbot: FC = (props) => { entities: response.info.entities, nodeDetails: response.info.nodedetails, error: response.info.error, + metric_question: response.info.metric_details.question, + metric_answer: response.info.metric_details.answer, + metric_contexts: response.info.metric_details.contexts, }; if (index === 0) { simulateTypingEffect(chatbotMessageId, responseMode, mode, responseMode.message); } else { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg ) ); } @@ -210,7 +260,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg ) ); } @@ -219,7 +269,7 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId + msg.id === chatbotMessageId ? { ...msg, modes: { @@ -227,21 +277,20 @@ const Chatbot: FC = (props) => { [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, }, } - : msg) + : msg ) ); } }); - - setListMessages((prev) => { - return prev.map((msg) => (msg.id === chatbotMessageId ? { ...msg, isLoading: false, isTyping: false } : msg)); - }); + setListMessages((prev) => + prev.map((msg) => (msg.id === chatbotMessageId ? { ...msg, isLoading: false, isTyping: false } : msg)) + ); } catch (error) { console.error('Error in handling chat:', error); if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId + msg.id === chatbotMessageId ? { ...msg, isLoading: false, @@ -253,7 +302,7 @@ const Chatbot: FC = (props) => { }, }, } - : msg) + : msg ) ); } @@ -338,6 +387,10 @@ const Chatbot: FC = (props) => { setEntitiesModal(currentMode.entities ?? []); setmessageError(currentMode.error ?? ''); setNodeDetailsModal(currentMode.nodeDetails ?? {}); + setMetricQuestion(currentMode.metric_question ?? ''); + setMetricContext(currentMode.metric_contexts ?? ''); + setMetricAnswer(currentMode.metric_answer ?? ''); + setActiveChat(chat); }, []); const speechHandler = useCallback((chat: Messages) => { @@ -347,6 +400,15 @@ const Chatbot: FC = (props) => { handleSpeak(chat.modes[chat.currentMode]?.message, chat.id); } }, []); + const downloadClickHandler = useCallback(function downloadClickHandler(JsonData: Type) { + const textFile = new Blob([JSON.stringify(JsonData)], { type: 'application/json' }); + if (downloadLinkRef && downloadLinkRef.current) { + downloadLinkRef.current.href = URL.createObjectURL(textFile); + downloadLinkRef.current.download = 'data.json'; + downloadLinkRef.current.click(); + } + }, []); + return (
    @@ -399,7 +461,9 @@ const Chatbot: FC = (props) => { chat.isLoading && index === listMessages.length - 1 && chat.user === 'chatbot' ? 'loader' : '' }`} > - {chat.modes[chat.currentMode]?.message || ''} + + {chat.modes[chat.currentMode]?.message || ''} +
    @@ -509,7 +573,31 @@ const Chatbot: FC = (props) => { onClose={() => setShowInfoModal(false)} open={showInfoModal} > -
    +
    + { + downloadClickHandler({ + chatResponse: activeChat, + chunks, + metricDetails, + communities, + responseTime, + entities: infoEntities, + nodes, + tokensUsed, + model, + }); + }} + > + + + "" + + = (props) => { graphonly_entities={graphEntitites} error={messageError} nodeDetails={nodeDetailsModal} + metricanswer={metricAnswer} + metriccontexts={metricContext} + metricquestion={metricQuestion} + metricmodel={model} + nodes={nodes} + infoEntities={infoEntities} + relationships={relationships} + chunks={chunks} + metricDetails={metricDetails} + communities={communities} + infoLoading={infoLoading} + metricsLoading={metricsLoading} + saveInfoEntitites={saveInfoEntitites} + saveChatRelationships={saveChatRelationships} + saveChunks={saveChunks} + saveCommunities={saveCommunities} + saveMetrics={saveMetrics} + saveNodes={saveNodes} + toggleInfoLoading={toggleInfoLoading} + toggleMetricsLoading={toggleMetricsLoading} /> diff --git a/frontend/src/components/ChatBot/MetricsTab.tsx b/frontend/src/components/ChatBot/MetricsTab.tsx new file mode 100644 index 000000000..a5680b8ad --- /dev/null +++ b/frontend/src/components/ChatBot/MetricsTab.tsx @@ -0,0 +1,113 @@ +import { Banner, DataGrid, DataGridComponents, Typography } from '@neo4j-ndl/react'; +import { MetricsState } from '../../types'; +import { memo, useMemo, useRef } from 'react'; +import { + useReactTable, + getCoreRowModel, + createColumnHelper, + getFilteredRowModel, + getPaginationRowModel, + getSortedRowModel, +} from '@tanstack/react-table'; +import { capitalize } from '../../utils/Utils'; +function MetricsTab({ + metricsLoading, + metricDetails, +}: { + metricsLoading: boolean; + metricDetails: MetricsState | null; +}) { + const columnHelper = createColumnHelper<{ metric: string; score: number }>(); + const tableRef = useRef(null); + + const columns = useMemo( + () => [ + columnHelper.accessor((row) => row.metric, { + id: 'Metric', + cell: (info) => { + const metric = info.getValue(); + const capitilizedMetric = metric.includes('_') + ? metric + .split('_') + .map((w) => capitalize(w)) + .join(' ') + : capitalize(metric); + return ( +
    + {capitilizedMetric} +
    + ); + }, + header: () => Metric, + footer: (info) => info.column.id, + }), + columnHelper.accessor((row) => row.score, { + id: 'Score', + cell: (info) => { + return {info.getValue().toFixed(2)}; + }, + }), + ], + [] + ); + const table = useReactTable({ + data: + metricDetails != null && !metricsLoading + ? Object.entries(metricDetails) + .slice(0, Object.keys(metricDetails).length - 1) + .map(([key, value]) => { + return { metric: key, score: value }; + }) + : [], + columns, + getCoreRowModel: getCoreRowModel(), + getFilteredRowModel: getFilteredRowModel(), + getPaginationRowModel: getPaginationRowModel(), + enableGlobalFilter: false, + autoResetPageIndex: false, + enableRowSelection: true, + enableMultiRowSelection: true, + enableSorting: true, + getSortedRowModel: getSortedRowModel(), + }); + return ( + <> + {metricDetails != null && metricDetails?.error?.trim() != '' ? ( + {metricDetails?.error} + ) : ( + , + PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { + return ( + + ); + }, + }} + /> + )} + + ); +} +export default memo(MetricsTab); diff --git a/frontend/src/services/GetRagasMetric.ts b/frontend/src/services/GetRagasMetric.ts new file mode 100644 index 000000000..a72c69e5d --- /dev/null +++ b/frontend/src/services/GetRagasMetric.ts @@ -0,0 +1,17 @@ +import { MetricsResponse } from '../types'; +import api from '../API/Index'; + +export const getChatMetrics = async (question: string, context: string, answer: string, model: string) => { + const formData = new FormData(); + formData.append('question', question); + formData.append('context', `[${context}]`); + formData.append('answer', answer); + formData.append('model', model); + try { + const response = await api.post(`/metric`, formData); + return response; + } catch (error) { + console.log(error); + throw error; + } +}; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index eb0d6c090..283227bcb 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -218,6 +218,9 @@ export type ResponseMode = { graphonly_entities?: []; error?: string; entities?: string[]; + metric_question?: string; + metric_contexts?: string; + metric_answer?: string; }; export interface Messages { id: number; @@ -407,6 +410,14 @@ export interface duplicateNodesData extends Partial { export interface OrphanNodeResponse extends Partial { data: orphanNodeProps[]; } +export type metricdetails = { + faithfulness: number; + answer_relevancy: number; + context_utilization: number; +}; +export interface MetricsResponse extends Omit { + data: metricdetails; +} export interface schema { nodelabels: string[]; relationshipTypes: string[]; @@ -430,6 +441,26 @@ export interface chatInfoMessage extends Partial { error: string; entities_ids: string[]; nodeDetails: nodeDetailsProps; + metricquestion: string; + metricanswer: string; + metriccontexts: string; + metricmodel: string; + nodes: ExtendedNode[]; + relationships: ExtendedRelationship[]; + chunks: Chunk[]; + metricDetails: MetricsState | null; + infoEntities: Entity[]; + communities: Community[]; + infoLoading: boolean; + metricsLoading: boolean; + saveInfoEntitites: (entities: Entity[]) => void; + saveNodes: (chatNodes: ExtendedNode[]) => void; + saveChatRelationships: (chatRels: ExtendedRelationship[]) => void; + saveChunks: (chatChunks: Chunk[]) => void; + saveMetrics: (metricInfo: MetricsState) => void; + saveCommunities: (chatCommunities: Community[]) => void; + toggleInfoLoading: React.DispatchWithoutAction; + toggleMetricsLoading: React.DispatchWithoutAction; } export interface eventResponsetypes extends Omit { @@ -744,3 +775,6 @@ export interface FileContextType { setPostProcessingVal: Dispatch>; } export declare type Side = 'top' | 'right' | 'bottom' | 'left'; +export interface MetricsState extends metricdetails { + error?: string; +} From b8296e94a6516341822fe95a92a1cdb21fc0688b Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Thu, 10 Oct 2024 15:58:44 +0530 Subject: [PATCH 159/292] Openai gemini config (#794) * openai and gemini models as config backend * updated dropdown llm values * updated docs --- README.md | 2 -- backend/example.env | 5 +++++ backend/src/llm.py | 14 ++++++++----- backend/src/main.py | 36 ++++++++++++++++++--------------- example.env | 4 ---- frontend/src/utils/Constants.ts | 10 ++++----- 6 files changed, 39 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index f1ef12198..01342a8a4 100644 --- a/README.md +++ b/README.md @@ -127,8 +127,6 @@ Allow unauthenticated request : Yes ## ENV | Env Variable Name | Mandatory/Optional | Default Value | Description | |-------------------------|--------------------|---------------|--------------------------------------------------------------------------------------------------| -| OPENAI_API_KEY | Mandatory | | API key for OpenAI | -| DIFFBOT_API_KEY | Mandatory | | API key for Diffbot | | EMBEDDING_MODEL | Optional | all-MiniLM-L6-v2 | Model for generating the text embedding (all-MiniLM-L6-v2 , openai , vertexai) | | IS_EMBEDDING | Optional | true | Flag to enable text embedding | | KNN_MIN_SCORE | Optional | 0.94 | Minimum score for KNN algorithm | diff --git a/backend/example.env b/backend/example.env index 1d14cfae4..75b817220 100644 --- a/backend/example.env +++ b/backend/example.env @@ -28,6 +28,11 @@ ENTITY_EMBEDDING="" True or False DUPLICATE_SCORE_VALUE = "" DUPLICATE_TEXT_DISTANCE = "" #examples +LLM_MODEL_CONFIG_openai_gpt_3.5="gpt-3.5-turbo-0125,openai_api_key" +LLM_MODEL_CONFIG_openai_gpt_4o_mini="gpt-4o-mini-2024-07-18,openai_api_key" +LLM_MODEL_CONFIG_gemini_1.5_pro="gemini-1.5-pro-002" +LLM_MODEL_CONFIG_gemini_1.5_flash="gemini-1.5-flash-002" +LLM_MODEL_CONFIG_diffbot="diffbot,diffbot_api_key" LLM_MODEL_CONFIG_azure_ai_gpt_35="azure_deployment_name,azure_endpoint or base_url,azure_api_key,api_version" LLM_MODEL_CONFIG_azure_ai_gpt_4o="gpt-4o,https://YOUR-ENDPOINT.openai.azure.com/,azure_api_key,api_version" LLM_MODEL_CONFIG_groq_llama3_70b="model_name,base_url,groq_api_key" diff --git a/backend/src/llm.py b/backend/src/llm.py index c7c6b36e2..f43be2a67 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -24,9 +24,11 @@ def get_llm(model: str): env_key = "LLM_MODEL_CONFIG_" + model env_value = os.environ.get(env_key) logging.info("Model: {}".format(env_key)) + if "gemini" in model: + model_name = env_value credentials, project_id = google.auth.default() - model_name = MODEL_VERSIONS[model] + #model_name = MODEL_VERSIONS[model] llm = ChatVertexAI( model_name=model_name, #convert_system_message_to_human=True, @@ -42,9 +44,10 @@ def get_llm(model: str): }, ) elif "openai" in model: - model_name = MODEL_VERSIONS[model] + #model_name = MODEL_VERSIONS[model] + model_name, api_key = env_value.split(",") llm = ChatOpenAI( - api_key=os.environ.get("OPENAI_API_KEY"), + api_key=api_key, model=model_name, temperature=0, ) @@ -93,9 +96,10 @@ def get_llm(model: str): llm = ChatOllama(base_url=base_url, model=model_name) elif "diffbot" in model: - model_name = "diffbot" + #model_name = "diffbot" + model_name, api_key = env_value.split(",") llm = DiffbotGraphTransformer( - diffbot_api_key=os.environ.get("DIFFBOT_API_KEY"), + diffbot_api_key=api_key, extract_types=["entities", "facts"], ) diff --git a/backend/src/main.py b/backend/src/main.py index 8b95b2e88..425a24451 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -518,26 +518,30 @@ def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): else: chunkId_chunkDoc_list=[] chunks = graph.query(QUERY_TO_GET_CHUNKS, params={"filename":file_name}) - for chunk in chunks: - chunk_doc = Document(page_content=chunk['text'], metadata={'id':chunk['id'], 'position':chunk['position']}) - chunkId_chunkDoc_list.append({'chunk_id': chunk['id'], 'chunk_doc': chunk_doc}) - if retry_condition == START_FROM_LAST_PROCESSED_POSITION: - logging.info(f"Retry : start_from_last_processed_position") - starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_POSITION, params={"filename":file_name}) - if starting_chunk[0]["position"] < len(chunkId_chunkDoc_list): - return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:] + if chunks[0]['text'] is None or chunks[0]['text']=="" : + raise Exception(f"Chunks are not created for {file_name}. Please re-upload file and try.") + else: + for chunk in chunks: + chunk_doc = Document(page_content=chunk['text'], metadata={'id':chunk['id'], 'position':chunk['position']}) + chunkId_chunkDoc_list.append({'chunk_id': chunk['id'], 'chunk_doc': chunk_doc}) - elif starting_chunk[0]["position"] == len(chunkId_chunkDoc_list): - starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_WITHOUT_ENTITY, params={"filename":file_name}) - return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:] + if retry_condition == START_FROM_LAST_PROCESSED_POSITION: + logging.info(f"Retry : start_from_last_processed_position") + starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_POSITION, params={"filename":file_name}) + if starting_chunk[0]["position"] < len(chunkId_chunkDoc_list): + return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:] + + elif starting_chunk[0]["position"] == len(chunkId_chunkDoc_list): + starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_WITHOUT_ENTITY, params={"filename":file_name}) + return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:] + + else: + raise Exception(f"All chunks of {file_name} are alreday processed. If you want to re-process, Please start from begnning") else: - raise Exception(f"All chunks of {file_name} are alreday processed. If you want to re-process, Please start from begnning") - - else: - logging.info(f"Retry : start_from_beginning with chunks {len(chunkId_chunkDoc_list)}") - return len(chunks), chunkId_chunkDoc_list + logging.info(f"Retry : start_from_beginning with chunks {len(chunkId_chunkDoc_list)}") + return len(chunks), chunkId_chunkDoc_list def get_source_list_from_graph(uri,userName,password,db_name=None): """ diff --git a/example.env b/example.env index 23bcc6e06..227c51904 100644 --- a/example.env +++ b/example.env @@ -1,7 +1,3 @@ -# Mandatory -OPENAI_API_KEY="" -DIFFBOT_API_KEY="" - # Optional Backend EMBEDDING_MODEL="all-MiniLM-L6-v2" IS_EMBEDDING="true" diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index f740531d0..47a19e701 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -41,11 +41,11 @@ export const llms = ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) : [ 'diffbot', - 'openai-gpt-3.5', - 'openai-gpt-4o', - 'openai-gpt-4o-mini', - 'gemini-1.5-pro', - 'gemini-1.5-flash', + 'openai_gpt_3.5', + 'openai_gpt_4o', + 'openai_gpt_4o_mini', + 'gemini_1.5_pro', + 'gemini_1.5_flash', 'azure_ai_gpt_35', 'azure_ai_gpt_4o', 'ollama_llama3', From b04f3828da4d8f099676389474e1c5eb99078882 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 10 Oct 2024 10:52:37 +0000 Subject: [PATCH 160/292] Added the user action for metrics table --- .../src/components/ChatBot/ChatInfoModal.tsx | 53 ++++++++++++------- frontend/src/components/ChatBot/Chatbot.tsx | 12 ++--- .../src/components/ChatBot/MetricsTab.tsx | 6 +-- 3 files changed, 43 insertions(+), 28 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 199d9eb6d..c666d5206 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -8,6 +8,7 @@ import { useCopyToClipboard, Banner, useMediaQuery, + Button, } from '@neo4j-ndl/react'; import { DocumentDuplicateIconOutline, ClipboardDocumentCheckIconOutline } from '@neo4j-ndl/react/icons'; import '../../styling/info.css'; @@ -27,6 +28,7 @@ import { chatModeLables } from '../../utils/Constants'; import { Relationship } from '@neo4j-nvl/base'; import { getChatMetrics } from '../../services/GetRagasMetric'; import MetricsTab from './MetricsTab'; +import { Stack } from '@mui/material'; const ChatInfoModal: React.FC = ({ sources, @@ -69,6 +71,7 @@ const ChatInfoModal: React.FC = ({ const themeUtils = useContext(ThemeWrapperContext); const [, copy] = useCopyToClipboard(); const [copiedText, setcopiedText] = useState(false); + const [showMetricsTable, setShowMetricsTable] = useState(false); const actions: CypherCodeBlockProps['actions'] = useMemo( () => [ @@ -173,32 +176,34 @@ const ChatInfoModal: React.FC = ({ } })(); } - (async () => { - try { - toggleMetricsLoading(); - const response = await getChatMetrics(metricquestion, metriccontexts, metricanswer, metricmodel); - toggleMetricsLoading(); - if (response.data.status === 'Success') { - saveMetrics({ ...response.data.data, error: '' }); - } else { - throw new Error(response.data.error); - } - } catch (error) { - if (error instanceof Error) { - toggleMetricsLoading(); - console.log('Error in getting chat metrics', error); - saveMetrics({ error: error.message, faithfulness: 0, answer_relevancy: 0, context_utilization: 0 }); - } - } - })(); () => { setcopiedText(false); + setShowMetricsTable(false); }; }, [nodeDetails, mode, error]); const onChangeTabs = (tabId: number) => { setActiveTab(tabId); }; + const loadMetrics = async () => { + setShowMetricsTable(true); + try { + toggleMetricsLoading(); + const response = await getChatMetrics(metricquestion, metriccontexts, metricanswer, metricmodel); + toggleMetricsLoading(); + if (response.data.status === 'Success') { + saveMetrics({ ...response.data.data, error: '' }); + } else { + throw new Error(response.data.error); + } + } catch (error) { + if (error instanceof Error) { + toggleMetricsLoading(); + console.log('Error in getting chat metrics', error); + saveMetrics({ error: error.message, faithfulness: 0, answer_relevancy: 0, context_utilization: 0 }); + } + } + }; return ( @@ -252,7 +257,17 @@ const ChatInfoModal: React.FC = ({ - + + + We use several key metrics to assess the quality of our chat responses. Click the button below to view + detailed scores for this interaction. These scores help us continuously improve the accuracy and + helpfulness of our chatbots. + + {showMetricsTable && } + + = (props) => { } else { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) ) ); } @@ -260,7 +260,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) ) ); } @@ -269,7 +269,7 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId + (msg.id === chatbotMessageId ? { ...msg, modes: { @@ -277,7 +277,7 @@ const Chatbot: FC = (props) => { [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, }, } - : msg + : msg) ) ); } @@ -290,7 +290,7 @@ const Chatbot: FC = (props) => { if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId + (msg.id === chatbotMessageId ? { ...msg, isLoading: false, @@ -302,7 +302,7 @@ const Chatbot: FC = (props) => { }, }, } - : msg + : msg) ) ); } diff --git a/frontend/src/components/ChatBot/MetricsTab.tsx b/frontend/src/components/ChatBot/MetricsTab.tsx index a5680b8ad..17b5e67ae 100644 --- a/frontend/src/components/ChatBot/MetricsTab.tsx +++ b/frontend/src/components/ChatBot/MetricsTab.tsx @@ -1,4 +1,4 @@ -import { Banner, DataGrid, DataGridComponents, Typography } from '@neo4j-ndl/react'; +import { Banner, Box, DataGrid, DataGridComponents, Typography } from '@neo4j-ndl/react'; import { MetricsState } from '../../types'; import { memo, useMemo, useRef } from 'react'; import { @@ -71,7 +71,7 @@ function MetricsTab({ getSortedRowModel: getSortedRowModel(), }); return ( - <> + {metricDetails != null && metricDetails?.error?.trim() != '' ? ( {metricDetails?.error} ) : ( @@ -107,7 +107,7 @@ function MetricsTab({ }} /> )} - + ); } export default memo(MetricsTab); From f97806c4375ca81e2f1c5cce8204772472aee9ca Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Thu, 10 Oct 2024 18:05:12 +0530 Subject: [PATCH 161/292] Graph enhancements (#795) * graph changes * graph properties changes * graph communities changes * graph type selection * checkbox check changes --- .../components/Graph/CheckboxSelection.tsx | 4 +- .../components/Graph/GraphPropertiesPanel.tsx | 91 +++++ .../components/Graph/GraphPropertiesTable.tsx | 39 +++ .../src/components/Graph/GraphViewModal.tsx | 322 +++++------------- frontend/src/components/Graph/LegendsChip.tsx | 2 +- frontend/src/components/Graph/ResizePanel.tsx | 56 +++ .../src/components/Graph/ResultOverview.tsx | 197 +++++++++++ frontend/src/types.ts | 55 ++- frontend/src/utils/Constants.ts | 233 ++++++------- frontend/src/utils/Utils.ts | 53 +-- 10 files changed, 646 insertions(+), 406 deletions(-) create mode 100644 frontend/src/components/Graph/GraphPropertiesPanel.tsx create mode 100644 frontend/src/components/Graph/GraphPropertiesTable.tsx create mode 100644 frontend/src/components/Graph/ResizePanel.tsx create mode 100644 frontend/src/components/Graph/ResultOverview.tsx diff --git a/frontend/src/components/Graph/CheckboxSelection.tsx b/frontend/src/components/Graph/CheckboxSelection.tsx index d0a3429f2..b8caf7484 100644 --- a/frontend/src/components/Graph/CheckboxSelection.tsx +++ b/frontend/src/components/Graph/CheckboxSelection.tsx @@ -7,7 +7,7 @@ const CheckboxSelection: React.FC = ({ graphType, loading, handleChange, - isgds, + isCommunity, isDocChunk, isEntity, }) => ( @@ -29,7 +29,7 @@ const CheckboxSelection: React.FC = ({ onChange={() => handleChange('Entities')} /> )} - {isgds && ( + {isCommunity && ( a.toLowerCase().localeCompare(b.toLowerCase()); + +const isNode = (item: BasicNode | BasicRelationship): item is BasicNode => { + return 'labels' in item && !('from' in item) && !('to' in item); +}; + +const GraphPropertiesPanel = ({ inspectedItem, newScheme }: GraphPropertiesPanelProps) => { + const inspectedItemType = isNode(inspectedItem) ? 'node' : 'relationship'; + const properties = inspectedItemType === 'node' + ? [ + { + key: '', + value: `${(inspectedItem as BasicNode).id}`, + type: 'String', + }, + ...Object.keys((inspectedItem as BasicNode).properties).map((key) => { + const value = (inspectedItem as BasicNode).properties[key]; + return { key: key, value: value ?? '' }; + }), + ] + : [ + { + key: '', + value: `${(inspectedItem as BasicRelationship).id}`, + type: 'String', + }, + { + key: '', + value: `${(inspectedItem as BasicRelationship).from}`, + type: 'String', + }, + { + key: '', + value: `${(inspectedItem as BasicRelationship).to}`, + type: 'String', + }, + { + key: '', + value: `${(inspectedItem as BasicRelationship).caption ?? ''}`, + type: 'String', + }, + ]; + const labelsSorted = useMemo(() => { + if (isNode(inspectedItem)) { + return [...inspectedItem.labels].sort(sortAlphabetically); + } + return []; + }, [inspectedItem]); + + return ( + <> + +
    + {inspectedItemType === 'node' ? 'Node details' : 'Relationship details'} +
    +
    + +
    + {isNode(inspectedItem) ? ( + labelsSorted.map((label) => ( + + )) + ) : ( + + )} +
    +
    + + + + ); +} + +export default GraphPropertiesPanel; \ No newline at end of file diff --git a/frontend/src/components/Graph/GraphPropertiesTable.tsx b/frontend/src/components/Graph/GraphPropertiesTable.tsx new file mode 100644 index 000000000..8cd7571c7 --- /dev/null +++ b/frontend/src/components/Graph/GraphPropertiesTable.tsx @@ -0,0 +1,39 @@ +import { GraphLabel, Typography } from '@neo4j-ndl/react'; +import { GraphPropertiesTableProps } from '../../types'; + +const GraphPropertiesTable = ({ propertiesWithTypes }: GraphPropertiesTableProps): JSX.Element => { + console.log('props', propertiesWithTypes); + return ( +
    +
    + + Key + + Value +
    + {propertiesWithTypes.map(({ key, value }, _) => { + return ( +
    +
    + + {key} + +
    +
    + {value} +
    +
    + ); + })} +
    + ); +}; + +export default GraphPropertiesTable; \ No newline at end of file diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 9dc561553..cb995a014 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -2,15 +2,15 @@ import { Banner, Dialog, Flex, - IconButton, IconButtonArray, LoadingSpinner, - TextInput, - Typography, useDebounce, } from '@neo4j-ndl/react'; -import { useCallback, useEffect, useRef, useState } from 'react'; +import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; import { + BasicNode, + BasicRelationship, + EntityType, ExtendedNode, ExtendedRelationship, GraphType, @@ -21,30 +21,29 @@ import { import { InteractiveNvlWrapper } from '@neo4j-nvl/react'; import NVL from '@neo4j-nvl/base'; import type { Node, Relationship } from '@neo4j-nvl/base'; -import { Resizable } from 're-resizable'; import { ArrowPathIconOutline, - DragIcon, FitToScreenIcon, - MagnifyingGlassIconOutline, MagnifyingGlassMinusIconOutline, MagnifyingGlassPlusIconOutline, } from '@neo4j-ndl/react/icons'; import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; -import { filterData, getCheckboxConditions, processGraphData, sortAlphabetically } from '../../utils/Utils'; +import { filterData, getCheckboxConditions, graphTypeFromNodes, processGraphData } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; -import { LegendsChip } from './LegendsChip'; + import graphQueryAPI from '../../services/GraphQuery'; import { graphLabels, intitalGraphType, - mouseEventCallbacks, nvlOptions, queryMap, - RESULT_STEP_SIZE, } from '../../utils/Constants'; import CheckboxSelection from './CheckboxSelection'; -import { ShowAll } from '../UI/ShowAll'; + +import ResultOverview from './ResultOverview'; +import { ResizePanelDetails } from './ResizePanel'; +import GraphPropertiesPanel from './GraphPropertiesPanel'; + const GraphViewModal: React.FunctionComponent = ({ open, @@ -57,51 +56,29 @@ const GraphViewModal: React.FunctionComponent = ({ }) => { const nvlRef = useRef(null); const [nodes, setNodes] = useState([]); - const [relationships, setRelationships] = useState([]); + const [relationships, setRelationships] = useState([]); const [allNodes, setAllNodes] = useState([]); const [allRelationships, setAllRelationships] = useState([]); const [loading, setLoading] = useState(false); const [status, setStatus] = useState<'unknown' | 'success' | 'danger'>('unknown'); const [statusMessage, setStatusMessage] = useState(''); - const { userCredentials, isGdsActive } = useCredentials(); + const { userCredentials } = useCredentials(); const [scheme, setScheme] = useState({}); const [newScheme, setNewScheme] = useState({}); const [searchQuery, setSearchQuery] = useState(''); const debouncedQuery = useDebounce(searchQuery, 300); - const [graphType, setGraphType] = useState(intitalGraphType(isGdsActive)); + const [graphType, setGraphType] = useState([]); const [disableRefresh, setDisableRefresh] = useState(false); - - // the checkbox selection - const handleCheckboxChange = (graph: GraphType) => { - const currentIndex = graphType.indexOf(graph); - const newGraphSelected = [...graphType]; - if (currentIndex === -1) { - newGraphSelected.push(graph); - initGraph(newGraphSelected, allNodes, allRelationships, scheme); - } else { - newGraphSelected.splice(currentIndex, 1); - initGraph(newGraphSelected, allNodes, allRelationships, scheme); - } - setSearchQuery(''); - setGraphType(newGraphSelected); - }; - - const nodeCount = (nodes: ExtendedNode[], label: string): number => { - return [...new Set(nodes?.filter((n) => n.labels?.includes(label)).map((i) => i.id))].length; - }; - - const relationshipCount = (relationships: ExtendedRelationship[], label: string): number => { - return [...new Set(relationships?.filter((r) => r.caption?.includes(label)).map((i) => i.id))].length; - }; + const [selected, setSelected] = useState<{ type: EntityType; id: string } | undefined>(undefined); const graphQuery: string = graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -110,7 +87,7 @@ const GraphViewModal: React.FunctionComponent = ({ {} ); }; - +console.log('graphType', graphType); // Unmounting the component useEffect(() => { const timeoutId = setTimeout(() => { @@ -120,7 +97,7 @@ const GraphViewModal: React.FunctionComponent = ({ if (nvlRef.current) { nvlRef.current?.destroy(); } - setGraphType(intitalGraphType(isGdsActive)); + setGraphType([]); clearTimeout(timeoutId); setScheme({}); setNodes([]); @@ -128,18 +105,26 @@ const GraphViewModal: React.FunctionComponent = ({ setAllNodes([]); setAllRelationships([]); setSearchQuery(''); + setSelected(undefined); }; }, []); + useEffect(() => { + const updateGraphType = graphTypeFromNodes(allNodes); + if (Array.isArray(updateGraphType)) { + setGraphType(updateGraphType); + } + }, [allNodes]) + const fetchData = useCallback(async () => { try { const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -188,7 +173,7 @@ const GraphViewModal: React.FunctionComponent = ({ useEffect(() => { if (open) { setLoading(true); - setGraphType(intitalGraphType(isGdsActive)); + setGraphType([]); if (viewPoint !== 'chatInfoView') { graphApi(); } else { @@ -202,7 +187,7 @@ const GraphViewModal: React.FunctionComponent = ({ setLoading(false); } } - }, [open, isGdsActive]); + }, [open]); useEffect(() => { handleSearch(debouncedQuery); @@ -220,7 +205,6 @@ const GraphViewModal: React.FunctionComponent = ({ finalNodes ?? [], finalRels ?? [], schemeVal, - isGdsActive ); setNodes(filteredNodes); setRelationships(filteredRelations); @@ -228,6 +212,17 @@ const GraphViewModal: React.FunctionComponent = ({ } }; + + const selectedItem = useMemo(() => { + if (selected === undefined) { + return undefined; + } + if (selected.type === 'node') { + return nodes.find((node) => node.id === selected.id); + } + return relationships.find((relationship) => relationship.id === selected.id); + }, [selected, relationships, nodes]); + // The search and update nodes const handleSearch = useCallback( (value: string) => { @@ -236,7 +231,6 @@ const GraphViewModal: React.FunctionComponent = ({ if (query === '') { return { ...node, - activated: false, selected: false, size: graphLabels.nodeSize, }; @@ -246,21 +240,19 @@ const GraphViewModal: React.FunctionComponent = ({ const match = id.toLowerCase().includes(query) || propertiesMatch || caption?.toLowerCase().includes(query); return { ...node, - activated: match, selected: match, size: match && viewPoint === graphLabels.showGraphView ? 100 : match && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, + ? 50 + : graphLabels.nodeSize, }; }); // deactivating any active relationships const updatedRelationships = relationships.map((rel) => { return { ...rel, - activated: false, selected: false, }; }); @@ -282,6 +274,26 @@ const GraphViewModal: React.FunctionComponent = ({ const checkBoxView = viewPoint !== graphLabels.chatInfoView; + // the checkbox selection + const handleCheckboxChange = (graph: GraphType) => { + const currentIndex = graphType.indexOf(graph); + const newGraphSelected = [...graphType]; + if (currentIndex === -1) { + newGraphSelected.push(graph); + initGraph(newGraphSelected, allNodes, allRelationships, scheme); + } else { + newGraphSelected.splice(currentIndex, 1); + initGraph(newGraphSelected, allNodes, allRelationships, scheme); + } + setSearchQuery(''); + setGraphType(newGraphSelected); + setSelected(undefined); + if (nvlRef.current && nvlRef?.current?.getScale() > 1) { + handleZoomToFit(); + } + }; + + //Callback const nvlCallbacks = { onLayoutComputing(isComputing: boolean) { if (!isComputing) { @@ -316,96 +328,31 @@ const GraphViewModal: React.FunctionComponent = ({ setStatusMessage(''); setGraphViewOpen(false); setScheme({}); - setGraphType(intitalGraphType(isGdsActive)); + setGraphType([]); setNodes([]); setRelationships([]); setAllNodes([]); setAllRelationships([]); setSearchQuery(''); + setSelected(undefined); }; - // sort the legends in with Chunk and Document always the first two values - const nodeCheck = Object.keys(newScheme).sort((a, b) => { - if (a === graphLabels.document || a === graphLabels.chunk) { - return -1; - } else if (b === graphLabels.document || b === graphLabels.chunk) { - return 1; - } - return a.localeCompare(b); - }); - // get sorted relationships - const relationshipsSorted = relationships.sort(sortAlphabetically); - - // To get the relationship count - const groupedAndSortedRelationships: ExtendedRelationship[] = Object.values( - relationshipsSorted.reduce((acc: { [key: string]: ExtendedRelationship }, relType: Relationship) => { - const key = relType.caption || ''; - if (!acc[key]) { - acc[key] = { ...relType, count: 0 }; - } - - acc[key]!.count += relationshipCount(relationships as ExtendedRelationship[], key); - return acc; - }, {}) - ); - - // On Node Click, highlighting the nodes and deactivating any active relationships - const handleNodeClick = (nodeLabel: string) => { - const updatedNodes = nodes.map((node) => { - const isActive = node.labels.includes(nodeLabel); - return { - ...node, - activated: isActive, - selected: isActive, - size: - isActive && viewPoint === graphLabels.showGraphView - ? 100 - : isActive && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, - }; - }); - // deactivating any active relationships - const updatedRelationships = relationships.map((rel) => { - return { - ...rel, - activated: false, - selected: false, - }; - }); - if (searchQuery !== '') { - setSearchQuery(''); - } - setNodes(updatedNodes); - setRelationships(updatedRelationships); - }; - // On Relationship Legend Click, highlight the relationships and deactivating any active nodes - const handleRelationshipClick = (nodeLabel: string) => { - const updatedRelations = relationships.map((rel) => { - return { - ...rel, - activated: rel?.caption?.includes(nodeLabel), - selected: rel?.caption?.includes(nodeLabel), - }; - }); - // // deactivating any active nodes - const updatedNodes = nodes.map((node) => { - return { - ...node, - activated: false, - selected: false, - size: graphLabels.nodeSize, - }; - }); - if (searchQuery !== '') { - setSearchQuery(''); - } - setRelationships(updatedRelations); - setNodes(updatedNodes); + const mouseEventCallbacks = { + onNodeClick: (clickedNode: Node) => { + setSelected({ type: 'node', id: clickedNode.id }); + }, + onRelationshipClick: (clickedRelationship: Relationship) => { + setSelected({ type: 'relationship', id: clickedRelationship.id }); + }, + onCanvasClick: () => { + setSelected(undefined); + }, + onPan: true, + onZoom: true, + onDrag: true, }; - // const isCommunity = allNodes.some(n=>n.labels.includes('__Community__')); return ( <> = ({
    - }} - handleClasses={{ left: 'ml-1' }} - > -
    - {nodeCheck.length > 0 && ( - <> - - {graphLabels.resultOverview} -
    - { - setSearchQuery(e.target.value); - }} - placeholder='Search On Node Properties' - fluid={true} - leftIcon={ - - - - } - /> -
    - - {graphLabels.totalNodes} ({nodes.length}) - -
    -
    - - {nodeCheck.map((nodeLabel, index) => ( - handleNodeClick(nodeLabel)} - /> - ))} - -
    - - )} - {relationshipsSorted.length > 0 && ( - <> - - - {graphLabels.totalRelationships} ({relationships.length}) - - -
    - - {groupedAndSortedRelationships.map((relType, index) => ( - handleRelationshipClick(relType.caption || '')} - /> - ))} - -
    - - )} -
    -
    + + {selectedItem !== undefined ? ( + + ) : ( + )} +
    )} diff --git a/frontend/src/components/Graph/LegendsChip.tsx b/frontend/src/components/Graph/LegendsChip.tsx index 832150fad..1196d8554 100644 --- a/frontend/src/components/Graph/LegendsChip.tsx +++ b/frontend/src/components/Graph/LegendsChip.tsx @@ -6,7 +6,7 @@ export const LegendsChip: React.FunctionComponent = ({ scheme, return ( { + if (!open) { + return null; + } + return ( + }} + handleClasses={{ left: 'ml-1' }} + onResizeStop={onResizeStop} + > +
    + {children} +
    +
    + ); +} +const Title = ({ children }: { children: React.ReactNode }) => { + return ( +
    + {children} +
    + ); +} +ResizePanelDetails.Title = Title; + +const Content = ({ children }: { children: React.ReactNode }) => { + return
    {children}
    ; +} +ResizePanelDetails.Content = Content; \ No newline at end of file diff --git a/frontend/src/components/Graph/ResultOverview.tsx b/frontend/src/components/Graph/ResultOverview.tsx new file mode 100644 index 000000000..1f755d188 --- /dev/null +++ b/frontend/src/components/Graph/ResultOverview.tsx @@ -0,0 +1,197 @@ + +import { Flex, IconButton, TextInput, Typography } from '@neo4j-ndl/react'; +import { MagnifyingGlassIconOutline } from "@neo4j-ndl/react/icons"; +import { LegendsChip } from './LegendsChip'; +import { + ExtendedNode, + ExtendedRelationship, + Scheme, +} from '../../types'; +import { + graphLabels, + RESULT_STEP_SIZE, +} from '../../utils/Constants'; +import type { Relationship } from '@neo4j-nvl/base'; +import { ShowAll } from '../UI/ShowAll'; +import { sortAlphabetically } from "../../utils/Utils"; +import { Dispatch, SetStateAction } from "react"; + +interface OverViewProps { + nodes: ExtendedNode[]; + relationships: ExtendedRelationship[]; + newScheme: Scheme; + searchQuery: string + setSearchQuery: Dispatch> + viewPoint: string, + setNodes: Dispatch>, + setRelationships: Dispatch> +} +const ResultOverview: React.FunctionComponent = ({ nodes, relationships, newScheme, searchQuery, setSearchQuery, viewPoint, setNodes, setRelationships }) => { + const nodeCount = (nodes: ExtendedNode[], label: string): number => { + return [...new Set(nodes?.filter((n) => n.labels?.includes(label)).map((i) => i.id))].length; + }; + + // sort the legends in with Chunk and Document always the first two values + const nodeCheck = Object.keys(newScheme).sort((a, b) => { + if (a === graphLabels.document || a === graphLabels.chunk) { + return -1; + } else if (b === graphLabels.document || b === graphLabels.chunk) { + return 1; + } + return a.localeCompare(b); + }); + + // get sorted relationships + const relationshipsSorted = relationships.sort(sortAlphabetically); + + + const relationshipCount = (relationships: ExtendedRelationship[], label: string): number => { + return [...new Set(relationships?.filter((r) => r.caption?.includes(label)).map((i) => i.id))].length; + }; + + // To get the relationship count + const groupedAndSortedRelationships: ExtendedRelationship[] = Object.values( + relationshipsSorted.reduce((acc: { [key: string]: ExtendedRelationship }, relType: Relationship) => { + const key = relType.caption || ''; + if (!acc[key]) { + acc[key] = { ...relType, count: 0 }; + } + (acc[key] as { count: number }).count += relationshipCount(relationships as ExtendedRelationship[], key); + return acc; + }, {}) + ); + + // On Relationship Legend Click, highlight the relationships and deactivating any active nodes + const handleRelationshipClick = (nodeLabel: string) => { + const updatedRelations = relationships.map((rel) => { + return { + ...rel, + selected: rel?.caption?.includes(nodeLabel), + }; + }); + + // // deactivating any active nodes + const updatedNodes = nodes.map((node) => { + return { + ...node, + selected: false, + size: graphLabels.nodeSize, + }; + }); + if (searchQuery !== '') { + setSearchQuery(''); + } + setRelationships(updatedRelations); + setNodes(updatedNodes); + }; + + + + // On Node Click, highlighting the nodes and deactivating any active relationships + const handleNodeClick = (nodeLabel: string) => { + const updatedNodes = nodes.map((node) => { + const isActive = node.labels.includes(nodeLabel); + return { + ...node, + selected: isActive, + size: + isActive && viewPoint === graphLabels.showGraphView + ? 100 + : isActive && viewPoint !== graphLabels.showGraphView + ? 50 + : graphLabels.nodeSize, + }; + }); + // deactivating any active relationships + const updatedRelationships = relationships.map((rel) => { + return { + ...rel, + selected: false, + }; + }); + if (searchQuery !== '') { + setSearchQuery(''); + } + setNodes(updatedNodes); + setRelationships(updatedRelationships); + }; + + return ( + <> + {nodeCheck.length > 0 && ( + <> + + {graphLabels.resultOverview} +
    + { + setSearchQuery(e.target.value); + }} + placeholder='Search On Node Properties' + fluid={true} + leftIcon={ + + + + } + /> +
    + + {graphLabels.totalNodes} ({nodes.length}) + +
    +
    + + {nodeCheck.map((nodeLabel, index) => ( + handleNodeClick(nodeLabel)} + /> + ))} + +
    + + )} + {relationshipsSorted.length > 0 && ( + <> + + + {graphLabels.totalRelationships} ({relationships.length}) + + +
    + + {groupedAndSortedRelationships.map((relType, index) => ( + handleRelationshipClick(relType.caption || '')} + scheme={{}} /> + ))} + +
    + + )} + + ) +} + +export default ResultOverview; \ No newline at end of file diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 283227bcb..392754d77 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -244,7 +244,7 @@ export type ChatbotProps = { isFullScreen?: boolean; connectionStatus: boolean; }; -export interface WikipediaModalTypes extends Omit {} +export interface WikipediaModalTypes extends Omit { } export interface GraphViewModalProps { open: boolean; @@ -266,7 +266,7 @@ export interface CheckboxSectionProps { graphType: GraphType[]; loading: boolean; handleChange: (graph: GraphType) => void; - isgds: boolean; + isCommunity: boolean; isDocChunk: boolean; isEntity: boolean; } @@ -347,8 +347,8 @@ export interface LegendChipProps { scheme: Scheme; label: string; type: 'node' | 'relationship' | 'propertyKey'; - count: number; - onClick: (e: React.MouseEvent) => void; + count?: number; + onClick?: (e: React.MouseEvent) => void; } export interface FileContextProviderProps { children: ReactNode; @@ -430,6 +430,10 @@ export interface SourceListServerData { message?: string; } +export interface MetricsState extends metricdetails { + error?: string; +} + export interface chatInfoMessage extends Partial { sources: string[]; model: string; @@ -590,13 +594,6 @@ export interface Origin { horizontal: Horizontal; } -export type BasicNode = { - id: string; - labels: string[]; - properties: Record; - propertyTypes: Record; -}; - export type GraphStatsLabels = Record< string, { @@ -614,7 +611,7 @@ export interface ExtendedNode extends Node { } export interface ExtendedRelationship extends Relationship { - count: number; + count?: number; } export interface connectionState { openPopUp: boolean; @@ -775,6 +772,34 @@ export interface FileContextType { setPostProcessingVal: Dispatch>; } export declare type Side = 'top' | 'right' | 'bottom' | 'left'; -export interface MetricsState extends metricdetails { - error?: string; -} + +export type EntityType = 'node' | 'relationship'; + +export type BasicRelationship = { + id: string; + to: string; + from: string; + type: string; + caption: string; +}; + +export type BasicNode = { + id: string; + type: string; + labels: string[]; + properties: Record; + propertyTypes: Record; +}; + + +export type GraphPropertiesTableProps = { + propertiesWithTypes: { + key: string; + value: string; + }[]; +}; + +export type GraphPropertiesPanelProps = { + inspectedItem: BasicNode | BasicRelationship; + newScheme: Scheme; +}; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 47a19e701..34e1b7263 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -3,34 +3,6 @@ import { GraphType, OptionType } from '../types'; import { getDateTime, getDescriptionForChatMode } from './Utils'; import chatbotmessages from '../assets/ChatbotMessages.json'; -export const document = `+ [docs]`; - -export const chunks = `+ collect { MATCH p=(c)-[:NEXT_CHUNK]-() RETURN p } // chunk-chain -+ collect { MATCH p=(c)-[:SIMILAR]-() RETURN p } // similar-chunks`; - -export const entities = `+ collect { OPTIONAL MATCH (c:Chunk)-[:HAS_ENTITY]->(e), p=(e)-[*0..1]-(:!Chunk) RETURN p}`; - -export const docEntities = `+ [docs] -+ collect { MATCH (c:Chunk)-[:HAS_ENTITY]->(e), p=(e)--(:!Chunk) RETURN p }`; - -export const docChunks = `+[chunks] -+collect {MATCH p=(c)-[:FIRST_CHUNK]-() RETURN p} //first chunk -+ collect { MATCH p=(c)-[:NEXT_CHUNK]-() RETURN p } // chunk-chain -+ collect { MATCH p=(c)-[:SIMILAR]-() RETURN p } // similar-chunk`; - -export const chunksEntities = `+ collect { MATCH p=(c)-[:NEXT_CHUNK]-() RETURN p } // chunk-chain - -+ collect { MATCH p=(c)-[:SIMILAR]-() RETURN p } // similar-chunks -//chunks with entities -+ collect { OPTIONAL MATCH p=(c:Chunk)-[:HAS_ENTITY]->(e)-[*0..1]-(:!Chunk) RETURN p }`; - -export const docChunkEntities = `+[chunks] -+collect {MATCH p=(c)-[:FIRST_CHUNK]-() RETURN p} //first chunk -+ collect { MATCH p=(c)-[:NEXT_CHUNK]-() RETURN p } // chunk-chain -+ collect { MATCH p=(c)-[:SIMILAR]-() RETURN p } // similar-chunks -//chunks with entities -+ collect { OPTIONAL MATCH p=(c:Chunk)-[:HAS_ENTITY]->(e)-[*0..1]-(:!Chunk) RETURN p }`; - export const APP_SOURCES = process.env.VITE_REACT_APP_SOURCES !== '' ? (process.env.VITE_REACT_APP_SOURCES?.split(',') as string[]) @@ -40,26 +12,26 @@ export const llms = process.env?.VITE_LLM_MODELS?.trim() != '' ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) : [ - 'diffbot', - 'openai_gpt_3.5', - 'openai_gpt_4o', - 'openai_gpt_4o_mini', - 'gemini_1.5_pro', - 'gemini_1.5_flash', - 'azure_ai_gpt_35', - 'azure_ai_gpt_4o', - 'ollama_llama3', - 'groq_llama3_70b', - 'anthropic_claude_3_5_sonnet', - 'fireworks_llama_v3p2_90b', - 'bedrock_claude_3_5_sonnet', - ]; + 'diffbot', + 'openai-gpt-3.5', + 'openai-gpt-4o', + 'openai-gpt-4o-mini', + 'gemini-1.5-pro', + 'gemini-1.5-flash', + 'azure_ai_gpt_35', + 'azure_ai_gpt_4o', + 'ollama_llama3', + 'groq_llama3_70b', + 'anthropic_claude_3_5_sonnet', + 'fireworks_llama_v3p2_90b', + 'bedrock_claude_3_5_sonnet', + ]; export const defaultLLM = llms?.includes('openai-gpt-4o') ? 'openai-gpt-4o' : llms?.includes('gemini-1.5-pro') - ? 'gemini-1.5-pro' - : 'diffbot'; + ? 'gemini-1.5-pro' + : 'diffbot'; export const chatModeLables = { vector: 'vector', @@ -75,40 +47,40 @@ export const chatModeLables = { export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',').map((mode) => ({ - mode: mode.trim(), - description: getDescriptionForChatMode(mode.trim()), - })) + mode: mode.trim(), + description: getDescriptionForChatMode(mode.trim()), + })) : [ - { - mode: chatModeLables.vector, - description: 'Performs semantic similarity search on text chunks using vector indexing.', - }, - { - mode: chatModeLables.graph, - description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', - }, - { - mode: chatModeLables.graph_vector, - description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', - }, - { - mode: chatModeLables.fulltext, - description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', - }, - { - mode: chatModeLables.graph_vector_fulltext, - description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', - }, - { - mode: chatModeLables.entity_vector, - description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', - }, - { - mode: chatModeLables.global_vector, - description: - 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', - }, - ]; + { + mode: chatModeLables.vector, + description: 'Performs semantic similarity search on text chunks using vector indexing.', + }, + { + mode: chatModeLables.graph, + description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', + }, + { + mode: chatModeLables.graph_vector, + description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', + }, + { + mode: chatModeLables.fulltext, + description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', + }, + { + mode: chatModeLables.graph_vector_fulltext, + description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', + }, + { + mode: chatModeLables.entity_vector, + description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', + }, + { + mode: chatModeLables.global_vector, + description: + 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', + }, + ]; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; @@ -116,53 +88,6 @@ export const timePerByte = 0.2; export const largeFileSize = process.env.VITE_LARGE_FILE_SIZE ? parseInt(process.env.VITE_LARGE_FILE_SIZE) : 5 * 1024 * 1024; -export const NODES_OPTIONS = [ - { - label: 'Person', - value: 'Person', - }, - { - label: 'Organization', - value: 'Organization', - }, - { - label: 'Event', - value: 'Event', - }, -]; - -export const RELATION_OPTIONS = [ - { - label: 'WORKS_AT', - value: 'WORKS_AT', - }, - { - label: 'IS_CEO', - value: 'IS_CEO', - }, - { - label: 'HOSTS_EVENT', - value: 'HOSTS_EVENT', - }, -]; - -export const queryMap: { - Document: string; - Chunks: string; - Entities: string; - DocEntities: string; - DocChunks: string; - ChunksEntities: string; - DocChunkEntities: string; -} = { - Document: 'document', - Chunks: 'chunks', - Entities: 'entities', - DocEntities: 'docEntities', - DocChunks: 'docChunks', - ChunksEntities: 'chunksEntities', - DocChunkEntities: 'docChunkEntities', -}; export const tooltips = { generateGraph: 'Generate graph from selected files', @@ -246,6 +171,35 @@ export const RETRY_OPIONS = [ ]; export const batchSize: number = parseInt(process.env.VITE_BATCH_SIZE ?? '2'); +//Graph Constants +export const document = `+ [docs]`; + +export const chunks = `+ collect { MATCH p=(c)-[:NEXT_CHUNK]-() RETURN p } // chunk-chain ++ collect { MATCH p=(c)-[:SIMILAR]-() RETURN p } // similar-chunks`; + +export const entities = `+ collect { OPTIONAL MATCH (c:Chunk)-[:HAS_ENTITY]->(e), p=(e)-[*0..1]-(:!Chunk) RETURN p}`; + +export const docEntities = `+ [docs] ++ collect { MATCH (c:Chunk)-[:HAS_ENTITY]->(e), p=(e)--(:!Chunk) RETURN p }`; + +export const docChunks = `+[chunks] ++collect {MATCH p=(c)-[:FIRST_CHUNK]-() RETURN p} //first chunk ++ collect { MATCH p=(c)-[:NEXT_CHUNK]-() RETURN p } // chunk-chain ++ collect { MATCH p=(c)-[:SIMILAR]-() RETURN p } // similar-chunk`; + +export const chunksEntities = `+ collect { MATCH p=(c)-[:NEXT_CHUNK]-() RETURN p } // chunk-chain + ++ collect { MATCH p=(c)-[:SIMILAR]-() RETURN p } // similar-chunks +//chunks with entities ++ collect { OPTIONAL MATCH p=(c:Chunk)-[:HAS_ENTITY]->(e)-[*0..1]-(:!Chunk) RETURN p }`; + +export const docChunkEntities = `+[chunks] ++collect {MATCH p=(c)-[:FIRST_CHUNK]-() RETURN p} //first chunk ++ collect { MATCH p=(c)-[:NEXT_CHUNK]-() RETURN p } // chunk-chain ++ collect { MATCH p=(c)-[:SIMILAR]-() RETURN p } // similar-chunks +//chunks with entities ++ collect { OPTIONAL MATCH p=(c:Chunk)-[:HAS_ENTITY]->(e)-[*0..1]-(:!Chunk) RETURN p }`; + export const nvlOptions: NvlOptions = { allowDynamicMinZoom: true, disableWebGL: true, @@ -257,10 +211,22 @@ export const nvlOptions: NvlOptions = { initialZoom: 1, }; -export const mouseEventCallbacks = { - onPan: true, - onZoom: true, - onDrag: true, +export const queryMap: { + Document: string; + Chunks: string; + Entities: string; + DocEntities: string; + DocChunks: string; + ChunksEntities: string; + DocChunkEntities: string; +} = { + Document: 'document', + Chunks: 'chunks', + Entities: 'entities', + DocEntities: 'docEntities', + DocChunks: 'docChunks', + ChunksEntities: 'chunksEntities', + DocChunkEntities: 'docChunkEntities', }; // export const graphQuery: string = queryMap.DocChunkEntities; @@ -276,11 +242,6 @@ export const intitalGraphType = (isGDSActive: boolean): GraphType[] => { : ['DocumentChunk', 'Entities']; // GDS is inactive, exclude communities }; -export const appLabels = { - ownSchema: 'Or Define your own Schema', - predefinedSchema: 'Select a Pre-defined Schema', -}; - export const graphLabels = { showGraphView: 'showGraphView', chatInfoView: 'chatInfoView', @@ -310,6 +271,12 @@ export const connectionLabels = { greenStroke: 'green', redStroke: 'red', }; + export const getDefaultMessage = () => { return [{ ...chatbotmessages.listMessages[0], datetime: getDateTime() }]; }; + +export const appLabels = { + ownSchema: 'Or Define your own Schema', + predefinedSchema: 'Select a Pre-defined Schema', +}; \ No newline at end of file diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 1d8f3be07..ae41a19fb 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -21,6 +21,7 @@ import s3logo from '../assets/images/s3logo.png'; import gcslogo from '../assets/images/gcs.webp'; import { chatModeLables } from './Constants'; + // Get the Url export const url = () => { let url = window.location.href.replace('5173', '8000'); @@ -205,7 +206,6 @@ export const filterData = ( allNodes: ExtendedNode[], allRelationships: Relationship[], scheme: Scheme, - isGdsActive: boolean ) => { let filteredNodes: ExtendedNode[] = []; let filteredRelations: Relationship[] = []; @@ -216,9 +216,7 @@ export const filterData = ( // Only Document + Chunk // const processedEntities = entityTypes.flatMap(item => item.includes(',') ? item.split(',') : item); if ( - graphType.includes('DocumentChunk') && - !graphType.includes('Entities') && - (!graphType.includes('Communities') || !isGdsActive) + graphType.includes('DocumentChunk') && !graphType.includes('Entities') && !graphType.includes('Communities') ) { filteredNodes = allNodes.filter( (node) => (node.labels.includes('Document') && node.properties.fileName) || node.labels.includes('Chunk') @@ -232,12 +230,8 @@ export const filterData = ( ); filteredScheme = { Document: scheme.Document, Chunk: scheme.Chunk }; // Only Entity - } else if ( - graphType.includes('Entities') && - !graphType.includes('DocumentChunk') && - (!graphType.includes('Communities') || !isGdsActive) - ) { - const entityNodes = allNodes.filter((node) => !node.labels.includes('Document') && !node.labels.includes('Chunk')); + } else if (graphType.includes('Entities') && !graphType.includes('DocumentChunk') && !graphType.includes('Communities')) { + const entityNodes = allNodes.filter((node) => !node.labels.includes('Document') && !node.labels.includes('Chunk') && !node.labels.includes('__Community__')); filteredNodes = entityNodes ? entityNodes : []; const nodeIds = new Set(filteredNodes.map((node) => node.id)); filteredRelations = allRelationships.filter( @@ -251,8 +245,7 @@ export const filterData = ( } else if ( graphType.includes('Communities') && !graphType.includes('DocumentChunk') && - !graphType.includes('Entities') && - isGdsActive + !graphType.includes('Entities') ) { filteredNodes = allNodes.filter((node) => node.labels.includes('__Community__')); const nodeIds = new Set(filteredNodes.map((node) => node.id)); @@ -265,7 +258,7 @@ export const filterData = ( } else if ( graphType.includes('DocumentChunk') && graphType.includes('Entities') && - (!graphType.includes('Communities') || !isGdsActive) + (!graphType.includes('Communities')) ) { filteredNodes = allNodes.filter( (node) => @@ -289,8 +282,7 @@ export const filterData = ( } else if ( graphType.includes('Entities') && graphType.includes('Communities') && - !graphType.includes('DocumentChunk') && - isGdsActive + !graphType.includes('DocumentChunk') ) { const entityNodes = allNodes.filter((node) => !node.labels.includes('Document') && !node.labels.includes('Chunk')); const communityNodes = allNodes.filter((node) => node.labels.includes('__Community__')); @@ -310,8 +302,7 @@ export const filterData = ( } else if ( graphType.includes('DocumentChunk') && graphType.includes('Communities') && - !graphType.includes('Entities') && - isGdsActive + !graphType.includes('Entities') ) { const documentChunkNodes = allNodes.filter( (node) => (node.labels.includes('Document') && node.properties.fileName) || node.labels.includes('Chunk') @@ -332,8 +323,7 @@ export const filterData = ( } else if ( graphType.includes('DocumentChunk') && graphType.includes('Entities') && - graphType.includes('Communities') && - isGdsActive + graphType.includes('Communities') ) { filteredNodes = allNodes; filteredRelations = allRelationships; @@ -475,8 +465,25 @@ export function isAllowedHost(url: string, allowedHosts: string[]) { } export const getCheckboxConditions = (allNodes: ExtendedNode[]) => { - const isDocChunk = allNodes.some((n) => n.labels?.includes('Document')); - const isEntity = allNodes.some((n) => !n.labels?.includes('Document') || !n.labels?.includes('Chunk')); - const isgds = allNodes.some((n) => n.labels?.includes('__Community__')); - return { isDocChunk, isEntity, isgds }; + const isDocChunk = allNodes.some((n) => n.labels?.includes('Document') || n.labels?.includes('Chunk')); + const isEntity = allNodes.some((n) => !n.labels?.includes('Document') && !n.labels?.includes('Chunk') && !n.labels?.includes('__Community__')); + const isCommunity = allNodes.some((n) => n.labels?.includes('__Community__')); + return { isDocChunk, isEntity, isCommunity }; }; + +export const graphTypeFromNodes = (allNodes:ExtendedNode[])=>{ + const graphType: GraphType[] =[]; + const hasDocChunk = allNodes.some((n) => n.labels?.includes('Document') || n.labels?.includes('Chunk')); + const hasEntity = allNodes.some((n) => !n.labels?.includes('Document') && !n.labels?.includes('Chunk') && !n.labels?.includes('__Community__')); + const hasCommunity = allNodes.some((n) => n.labels?.includes('__Community__')); + if(hasDocChunk){ + graphType.push('DocumentChunk'); + } + if(hasEntity){ + graphType.push('Entities'); + } + if(hasCommunity){ + graphType.push('Communities'); + } + return graphType; +} \ No newline at end of file From ba95afd9e2fe4b5f51a726c2fbb874cfc1f084fd Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Thu, 10 Oct 2024 12:49:14 +0000 Subject: [PATCH 162/292] format changes --- frontend/src/components/ChatBot/Chatbot.tsx | 459 +++++++++--------- .../components/Graph/GraphPropertiesPanel.tsx | 143 +++--- .../components/Graph/GraphPropertiesTable.tsx | 59 ++- .../src/components/Graph/GraphViewModal.tsx | 65 ++- frontend/src/components/Graph/ResizePanel.tsx | 86 ++-- .../src/components/Graph/ResultOverview.tsx | 348 +++++++------ .../Deduplication/index.tsx | 4 +- .../SelectedJobList.tsx | 4 +- frontend/src/types.ts | 3 +- frontend/src/utils/Constants.ts | 102 ++-- frontend/src/utils/Utils.ts | 42 +- 11 files changed, 646 insertions(+), 669 deletions(-) diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 7a8b64a73..9e3ba07f6 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -245,7 +245,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg ) ); } @@ -260,7 +260,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg ) ); } @@ -269,15 +269,15 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId + msg.id === chatbotMessageId ? { - ...msg, - modes: { - ...msg.modes, - [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, - }, - } - : msg) + ...msg, + modes: { + ...msg.modes, + [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, + }, + } + : msg ) ); } @@ -290,19 +290,19 @@ const Chatbot: FC = (props) => { if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId + msg.id === chatbotMessageId ? { - ...msg, - isLoading: false, - isTyping: false, - modes: { - [chatModes[0]]: { - message: 'An error occurred while processing your request.', - error: error.message, - }, + ...msg, + isLoading: false, + isTyping: false, + modes: { + [chatModes[0]]: { + message: 'An error occurred while processing your request.', + error: error.message, }, - } - : msg) + }, + } + : msg ) ); } @@ -409,78 +409,100 @@ const Chatbot: FC = (props) => { } }, []); - return ( -
    -
    - -
    - {listMessages.map((chat, index) => { - const messagechatModes = Object.keys(chat.modes); - return ( -
    -
    - {chat.user === 'chatbot' ? ( - - ) : ( - - )} -
    - +
    + +
    + {listMessages.map((chat, index) => { + const messagechatModes = Object.keys(chat.modes); + return ( +
    +
    + {chat.user === 'chatbot' ? ( + + ) : ( + + )} +
    + -
    +
    - - {chat.modes[chat.currentMode]?.message || ''} - -
    + > + + {chat.modes[chat.currentMode]?.message || ''} + +
    +
    -
    - - {chat.datetime} - -
    - {chat.user === 'chatbot' && - chat.id !== 2 && - !chat.isLoading && - !chat.isTyping && - (!isFullScreen ? ( - 1 ? 'space-between' : 'unset'} - alignItems='center' - > + + {chat.datetime} + +
    + {chat.user === 'chatbot' && + chat.id !== 2 && + !chat.isLoading && + !chat.isTyping && + (!isFullScreen ? ( + 1 ? 'space-between' : 'unset'} + alignItems='center' + > + + {messagechatModes.length > 1 && ( + { + const modes = Object.keys(chat.modes); + const modeswtich = modes[index]; + handleSwitchMode(chat.id, modeswtich); + }} + isFullScreen={false} + currentModeIndex={messagechatModes.indexOf(chat.currentMode)} + modescount={messagechatModes.length} + /> + )} + + ) : ( + + = (props) => { listMessages={listMessages} speechHandler={speechHandler} > + + {messagechatModes.length > 1 && ( = (props) => { const modeswtich = modes[index]; handleSwitchMode(chat.id, modeswtich); }} - isFullScreen={false} + isFullScreen={isFullScreen} currentModeIndex={messagechatModes.indexOf(chat.currentMode)} modescount={messagechatModes.length} /> )} - - ) : ( - - - - - - {messagechatModes.length > 1 && ( - { - const modes = Object.keys(chat.modes); - const modeswtich = modes[index]; - handleSwitchMode(chat.id, modeswtich); - }} - isFullScreen={isFullScreen} - currentModeIndex={messagechatModes.indexOf(chat.currentMode)} - modescount={messagechatModes.length} - /> - )} - - - ))} -
    -
    -
    - ); - })} -
    -
    -
    -
    -
    - + + ))} +
    +
    +
    + ); + })} +
    +
    +
    +
    + + - - {buttonCaptions.ask}{' '} - {selectedFileNames != undefined && selectedFileNames.length > 0 && `(${selectedFileNames.length})`} - - -
    - }> - setShowInfoModal(false)} - open={showInfoModal} + aria-label='chatbot-input' + type='text' + value={inputMessage} + fluid + onChange={handleInputChange} + name='chatbot-input' + /> + -
    - { - downloadClickHandler({ - chatResponse: activeChat, - chunks, - metricDetails, - communities, - responseTime, - entities: infoEntities, - nodes, - tokensUsed, - model, - }); - }} - > - - - "" - - - setShowInfoModal(false)} - > - - -
    - -
    -
    + {buttonCaptions.ask}{' '} + {selectedFileNames != undefined && selectedFileNames.length > 0 && `(${selectedFileNames.length})`} + +
    - ); + }> + setShowInfoModal(false)} + open={showInfoModal} + > +
    + { + downloadClickHandler({ + chatResponse: activeChat, + chunks, + metricDetails, + communities, + responseTime, + entities: infoEntities, + nodes, + tokensUsed, + model, + }); + }} + > + + + "" + + + setShowInfoModal(false)} + > + + +
    + +
    +
    +
    +); }; export default Chatbot; diff --git a/frontend/src/components/Graph/GraphPropertiesPanel.tsx b/frontend/src/components/Graph/GraphPropertiesPanel.tsx index 276160b69..64332c9ac 100644 --- a/frontend/src/components/Graph/GraphPropertiesPanel.tsx +++ b/frontend/src/components/Graph/GraphPropertiesPanel.tsx @@ -4,88 +4,81 @@ import { BasicNode, BasicRelationship, GraphPropertiesPanelProps } from '../../t import { LegendsChip } from './LegendsChip'; import GraphPropertiesTable from './GraphPropertiesTable'; - const sortAlphabetically = (a: string, b: string) => a.toLowerCase().localeCompare(b.toLowerCase()); const isNode = (item: BasicNode | BasicRelationship): item is BasicNode => { - return 'labels' in item && !('from' in item) && !('to' in item); + return 'labels' in item && !('from' in item) && !('to' in item); }; const GraphPropertiesPanel = ({ inspectedItem, newScheme }: GraphPropertiesPanelProps) => { - const inspectedItemType = isNode(inspectedItem) ? 'node' : 'relationship'; - const properties = inspectedItemType === 'node' - ? [ - { - key: '', - value: `${(inspectedItem as BasicNode).id}`, - type: 'String', - }, - ...Object.keys((inspectedItem as BasicNode).properties).map((key) => { - const value = (inspectedItem as BasicNode).properties[key]; - return { key: key, value: value ?? '' }; - }), + const inspectedItemType = isNode(inspectedItem) ? 'node' : 'relationship'; + const properties = + inspectedItemType === 'node' + ? [ + { + key: '', + value: `${(inspectedItem as BasicNode).id}`, + type: 'String', + }, + ...Object.keys((inspectedItem as BasicNode).properties).map((key) => { + const value = (inspectedItem as BasicNode).properties[key]; + return { key: key, value: value ?? '' }; + }), ] - : [ - { - key: '', - value: `${(inspectedItem as BasicRelationship).id}`, - type: 'String', - }, - { - key: '', - value: `${(inspectedItem as BasicRelationship).from}`, - type: 'String', - }, - { - key: '', - value: `${(inspectedItem as BasicRelationship).to}`, - type: 'String', - }, - { - key: '', - value: `${(inspectedItem as BasicRelationship).caption ?? ''}`, - type: 'String', - }, + : [ + { + key: '', + value: `${(inspectedItem as BasicRelationship).id}`, + type: 'String', + }, + { + key: '', + value: `${(inspectedItem as BasicRelationship).from}`, + type: 'String', + }, + { + key: '', + value: `${(inspectedItem as BasicRelationship).to}`, + type: 'String', + }, + { + key: '', + value: `${(inspectedItem as BasicRelationship).caption ?? ''}`, + type: 'String', + }, ]; - const labelsSorted = useMemo(() => { - if (isNode(inspectedItem)) { - return [...inspectedItem.labels].sort(sortAlphabetically); - } - return []; - }, [inspectedItem]); + const labelsSorted = useMemo(() => { + if (isNode(inspectedItem)) { + return [...inspectedItem.labels].sort(sortAlphabetically); + } + return []; + }, [inspectedItem]); - return ( - <> - -
    - {inspectedItemType === 'node' ? 'Node details' : 'Relationship details'} -
    -
    - -
    - {isNode(inspectedItem) ? ( - labelsSorted.map((label) => ( - - )) - ) : ( - - )} -
    -
    - - - - ); -} + return ( + <> + +
    {inspectedItemType === 'node' ? 'Node details' : 'Relationship details'}
    +
    + +
    + {isNode(inspectedItem) ? ( + labelsSorted.map((label) => ( + + )) + ) : ( + + )} +
    +
    + + + + ); +}; -export default GraphPropertiesPanel; \ No newline at end of file +export default GraphPropertiesPanel; diff --git a/frontend/src/components/Graph/GraphPropertiesTable.tsx b/frontend/src/components/Graph/GraphPropertiesTable.tsx index 8cd7571c7..7600ccd35 100644 --- a/frontend/src/components/Graph/GraphPropertiesTable.tsx +++ b/frontend/src/components/Graph/GraphPropertiesTable.tsx @@ -2,38 +2,33 @@ import { GraphLabel, Typography } from '@neo4j-ndl/react'; import { GraphPropertiesTableProps } from '../../types'; const GraphPropertiesTable = ({ propertiesWithTypes }: GraphPropertiesTableProps): JSX.Element => { - console.log('props', propertiesWithTypes); - return ( -
    -
    - - Key - - Value + console.log('props', propertiesWithTypes); + return ( +
    +
    + + Key + + Value +
    + {propertiesWithTypes.map(({ key, value }, _) => { + return ( +
    +
    + + {key} +
    - {propertiesWithTypes.map(({ key, value }, _) => { - return ( -
    -
    - - {key} - -
    -
    - {value} -
    -
    - ); - })} -
    - ); +
    {value}
    +
    + ); + })} +
    + ); }; -export default GraphPropertiesTable; \ No newline at end of file +export default GraphPropertiesTable; diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index cb995a014..dddfb4175 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -1,11 +1,4 @@ -import { - Banner, - Dialog, - Flex, - IconButtonArray, - LoadingSpinner, - useDebounce, -} from '@neo4j-ndl/react'; +import { Banner, Dialog, Flex, IconButtonArray, LoadingSpinner, useDebounce } from '@neo4j-ndl/react'; import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; import { BasicNode, @@ -32,19 +25,13 @@ import { filterData, getCheckboxConditions, graphTypeFromNodes, processGraphData import { useCredentials } from '../../context/UserCredentials'; import graphQueryAPI from '../../services/GraphQuery'; -import { - graphLabels, - intitalGraphType, - nvlOptions, - queryMap, -} from '../../utils/Constants'; +import { graphLabels, nvlOptions, queryMap } from '../../utils/Constants'; import CheckboxSelection from './CheckboxSelection'; import ResultOverview from './ResultOverview'; import { ResizePanelDetails } from './ResizePanel'; import GraphPropertiesPanel from './GraphPropertiesPanel'; - const GraphViewModal: React.FunctionComponent = ({ open, inspectedName, @@ -75,10 +62,10 @@ const GraphViewModal: React.FunctionComponent = ({ graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -87,7 +74,7 @@ const GraphViewModal: React.FunctionComponent = ({ {} ); }; -console.log('graphType', graphType); + console.log('graphType', graphType); // Unmounting the component useEffect(() => { const timeoutId = setTimeout(() => { @@ -114,17 +101,17 @@ console.log('graphType', graphType); if (Array.isArray(updateGraphType)) { setGraphType(updateGraphType); } - }, [allNodes]) + }, [allNodes]); const fetchData = useCallback(async () => { try { const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -204,7 +191,7 @@ console.log('graphType', graphType); graphType, finalNodes ?? [], finalRels ?? [], - schemeVal, + schemeVal ); setNodes(filteredNodes); setRelationships(filteredRelations); @@ -212,7 +199,6 @@ console.log('graphType', graphType); } }; - const selectedItem = useMemo(() => { if (selected === undefined) { return undefined; @@ -245,8 +231,8 @@ console.log('graphType', graphType); match && viewPoint === graphLabels.showGraphView ? 100 : match && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, + ? 50 + : graphLabels.nodeSize, }; }); // deactivating any active relationships @@ -293,7 +279,7 @@ console.log('graphType', graphType); } }; - //Callback + // Callback const nvlCallbacks = { onLayoutComputing(isComputing: boolean) { if (!isComputing) { @@ -337,7 +323,6 @@ console.log('graphType', graphType); setSelected(undefined); }; - const mouseEventCallbacks = { onNodeClick: (clickedNode: Node) => { setSelected({ type: 'node', id: clickedNode.id }); @@ -442,10 +427,22 @@ console.log('graphType', graphType);
    {selectedItem !== undefined ? ( - + ) : ( - )} + + )}
    diff --git a/frontend/src/components/Graph/ResizePanel.tsx b/frontend/src/components/Graph/ResizePanel.tsx index 049b782f1..a0eac1a7e 100644 --- a/frontend/src/components/Graph/ResizePanel.tsx +++ b/frontend/src/components/Graph/ResizePanel.tsx @@ -3,54 +3,52 @@ import type { ResizeCallback } from 're-resizable'; import { Resizable } from 're-resizable'; type ResizePanelProps = { - children: React.ReactNode; - open: boolean; - onResizeStop?: ResizeCallback; - defaultWidth?: number; + children: React.ReactNode; + open: boolean; + onResizeStop?: ResizeCallback; + defaultWidth?: number; }; export const ResizePanelDetails = ({ children, open, onResizeStop }: ResizePanelProps) => { - if (!open) { - return null; - } - return ( - }} - handleClasses={{ left: 'ml-1' }} - onResizeStop={onResizeStop} - > -
    - {children} -
    -
    - ); -} + if (!open) { + return null; + } + return ( + }} + handleClasses={{ left: 'ml-1' }} + onResizeStop={onResizeStop} + > +
    {children}
    +
    + ); +}; const Title = ({ children }: { children: React.ReactNode }) => { - return ( -
    - {children} -
    - ); -} + return ( +
    + {children} +
    + ); +}; ResizePanelDetails.Title = Title; const Content = ({ children }: { children: React.ReactNode }) => { - return
    {children}
    ; -} -ResizePanelDetails.Content = Content; \ No newline at end of file + return
    {children}
    ; +}; +ResizePanelDetails.Content = Content; diff --git a/frontend/src/components/Graph/ResultOverview.tsx b/frontend/src/components/Graph/ResultOverview.tsx index 1f755d188..93c6b54e3 100644 --- a/frontend/src/components/Graph/ResultOverview.tsx +++ b/frontend/src/components/Graph/ResultOverview.tsx @@ -1,197 +1,187 @@ - import { Flex, IconButton, TextInput, Typography } from '@neo4j-ndl/react'; -import { MagnifyingGlassIconOutline } from "@neo4j-ndl/react/icons"; +import { MagnifyingGlassIconOutline } from '@neo4j-ndl/react/icons'; import { LegendsChip } from './LegendsChip'; -import { - ExtendedNode, - ExtendedRelationship, - Scheme, -} from '../../types'; -import { - graphLabels, - RESULT_STEP_SIZE, -} from '../../utils/Constants'; +import { ExtendedNode, ExtendedRelationship, Scheme } from '../../types'; +import { graphLabels, RESULT_STEP_SIZE } from '../../utils/Constants'; import type { Relationship } from '@neo4j-nvl/base'; import { ShowAll } from '../UI/ShowAll'; -import { sortAlphabetically } from "../../utils/Utils"; -import { Dispatch, SetStateAction } from "react"; +import { sortAlphabetically } from '../../utils/Utils'; +import { Dispatch, SetStateAction } from 'react'; interface OverViewProps { - nodes: ExtendedNode[]; - relationships: ExtendedRelationship[]; - newScheme: Scheme; - searchQuery: string - setSearchQuery: Dispatch> - viewPoint: string, - setNodes: Dispatch>, - setRelationships: Dispatch> + nodes: ExtendedNode[]; + relationships: ExtendedRelationship[]; + newScheme: Scheme; + searchQuery: string; + setSearchQuery: Dispatch>; + viewPoint: string; + setNodes: Dispatch>; + setRelationships: Dispatch>; } -const ResultOverview: React.FunctionComponent = ({ nodes, relationships, newScheme, searchQuery, setSearchQuery, viewPoint, setNodes, setRelationships }) => { - const nodeCount = (nodes: ExtendedNode[], label: string): number => { - return [...new Set(nodes?.filter((n) => n.labels?.includes(label)).map((i) => i.id))].length; - }; - - // sort the legends in with Chunk and Document always the first two values - const nodeCheck = Object.keys(newScheme).sort((a, b) => { - if (a === graphLabels.document || a === graphLabels.chunk) { - return -1; - } else if (b === graphLabels.document || b === graphLabels.chunk) { - return 1; - } - return a.localeCompare(b); - }); +const ResultOverview: React.FunctionComponent = ({ + nodes, + relationships, + newScheme, + searchQuery, + setSearchQuery, + viewPoint, + setNodes, + setRelationships, +}) => { + const nodeCount = (nodes: ExtendedNode[], label: string): number => { + return [...new Set(nodes?.filter((n) => n.labels?.includes(label)).map((i) => i.id))].length; + }; - // get sorted relationships - const relationshipsSorted = relationships.sort(sortAlphabetically); + // sort the legends in with Chunk and Document always the first two values + const nodeCheck = Object.keys(newScheme).sort((a, b) => { + if (a === graphLabels.document || a === graphLabels.chunk) { + return -1; + } else if (b === graphLabels.document || b === graphLabels.chunk) { + return 1; + } + return a.localeCompare(b); + }); + // get sorted relationships + const relationshipsSorted = relationships.sort(sortAlphabetically); - const relationshipCount = (relationships: ExtendedRelationship[], label: string): number => { - return [...new Set(relationships?.filter((r) => r.caption?.includes(label)).map((i) => i.id))].length; - }; + const relationshipCount = (relationships: ExtendedRelationship[], label: string): number => { + return [...new Set(relationships?.filter((r) => r.caption?.includes(label)).map((i) => i.id))].length; + }; - // To get the relationship count - const groupedAndSortedRelationships: ExtendedRelationship[] = Object.values( - relationshipsSorted.reduce((acc: { [key: string]: ExtendedRelationship }, relType: Relationship) => { - const key = relType.caption || ''; - if (!acc[key]) { - acc[key] = { ...relType, count: 0 }; - } - (acc[key] as { count: number }).count += relationshipCount(relationships as ExtendedRelationship[], key); - return acc; - }, {}) - ); - - // On Relationship Legend Click, highlight the relationships and deactivating any active nodes - const handleRelationshipClick = (nodeLabel: string) => { - const updatedRelations = relationships.map((rel) => { - return { - ...rel, - selected: rel?.caption?.includes(nodeLabel), - }; - }); - - // // deactivating any active nodes - const updatedNodes = nodes.map((node) => { - return { - ...node, - selected: false, - size: graphLabels.nodeSize, - }; - }); - if (searchQuery !== '') { - setSearchQuery(''); - } - setRelationships(updatedRelations); - setNodes(updatedNodes); - }; + // To get the relationship count + const groupedAndSortedRelationships: ExtendedRelationship[] = Object.values( + relationshipsSorted.reduce((acc: { [key: string]: ExtendedRelationship }, relType: Relationship) => { + const key = relType.caption || ''; + if (!acc[key]) { + acc[key] = { ...relType, count: 0 }; + } + (acc[key] as { count: number }).count += relationshipCount(relationships as ExtendedRelationship[], key); + return acc; + }, {}) + ); + // On Relationship Legend Click, highlight the relationships and deactivating any active nodes + const handleRelationshipClick = (nodeLabel: string) => { + const updatedRelations = relationships.map((rel) => { + return { + ...rel, + selected: rel?.caption?.includes(nodeLabel), + }; + }); + // // deactivating any active nodes + const updatedNodes = nodes.map((node) => { + return { + ...node, + selected: false, + size: graphLabels.nodeSize, + }; + }); + if (searchQuery !== '') { + setSearchQuery(''); + } + setRelationships(updatedRelations); + setNodes(updatedNodes); + }; - // On Node Click, highlighting the nodes and deactivating any active relationships - const handleNodeClick = (nodeLabel: string) => { - const updatedNodes = nodes.map((node) => { - const isActive = node.labels.includes(nodeLabel); - return { - ...node, - selected: isActive, - size: - isActive && viewPoint === graphLabels.showGraphView - ? 100 - : isActive && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, - }; - }); - // deactivating any active relationships - const updatedRelationships = relationships.map((rel) => { - return { - ...rel, - selected: false, - }; - }); - if (searchQuery !== '') { - setSearchQuery(''); - } - setNodes(updatedNodes); - setRelationships(updatedRelationships); - }; + // On Node Click, highlighting the nodes and deactivating any active relationships + const handleNodeClick = (nodeLabel: string) => { + const updatedNodes = nodes.map((node) => { + const isActive = node.labels.includes(nodeLabel); + return { + ...node, + selected: isActive, + size: + isActive && viewPoint === graphLabels.showGraphView + ? 100 + : isActive && viewPoint !== graphLabels.showGraphView + ? 50 + : graphLabels.nodeSize, + }; + }); + // deactivating any active relationships + const updatedRelationships = relationships.map((rel) => { + return { + ...rel, + selected: false, + }; + }); + if (searchQuery !== '') { + setSearchQuery(''); + } + setNodes(updatedNodes); + setRelationships(updatedRelationships); + }; - return ( + return ( + <> + {nodeCheck.length > 0 && ( <> - {nodeCheck.length > 0 && ( - <> - - {graphLabels.resultOverview} -
    - { - setSearchQuery(e.target.value); - }} - placeholder='Search On Node Properties' - fluid={true} - leftIcon={ - - - - } - /> -
    - - {graphLabels.totalNodes} ({nodes.length}) - -
    -
    - - {nodeCheck.map((nodeLabel, index) => ( - handleNodeClick(nodeLabel)} - /> - ))} - -
    - - )} - {relationshipsSorted.length > 0 && ( - <> - - - {graphLabels.totalRelationships} ({relationships.length}) - - -
    - - {groupedAndSortedRelationships.map((relType, index) => ( - handleRelationshipClick(relType.caption || '')} - scheme={{}} /> - ))} - -
    - - )} + + {graphLabels.resultOverview} +
    + { + setSearchQuery(e.target.value); + }} + placeholder='Search On Node Properties' + fluid={true} + leftIcon={ + + + + } + /> +
    + + {graphLabels.totalNodes} ({nodes.length}) + +
    +
    + + {nodeCheck.map((nodeLabel, index) => ( + handleNodeClick(nodeLabel)} + /> + ))} + +
    - ) -} + )} + {relationshipsSorted.length > 0 && ( + <> + + + {graphLabels.totalRelationships} ({relationships.length}) + + +
    + + {groupedAndSortedRelationships.map((relType, index) => ( + handleRelationshipClick(relType.caption || '')} + scheme={{}} + /> + ))} + +
    + + )} + + ); +}; -export default ResultOverview; \ No newline at end of file +export default ResultOverview; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index f5a021e30..36fcff3d1 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -80,12 +80,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - (d.e.elementId === nodeid + d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d) + : d ); }); }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx index 1cb3c7310..a7621f2b6 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx @@ -11,11 +11,11 @@ export default function SelectedJobList({ }) { const ongoingPostProcessingTasks = useMemo( () => - (isGdsActive + isGdsActive ? postProcessingTasks.includes('enable_communities') ? postProcessingTasks : postProcessingTasks.filter((s) => s != 'enable_communities') - : postProcessingTasks.filter((s) => s != 'enable_communities')), + : postProcessingTasks.filter((s) => s != 'enable_communities'), [isGdsActive, postProcessingTasks] ); return ( diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 392754d77..54deb12b9 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -244,7 +244,7 @@ export type ChatbotProps = { isFullScreen?: boolean; connectionStatus: boolean; }; -export interface WikipediaModalTypes extends Omit { } +export interface WikipediaModalTypes extends Omit {} export interface GraphViewModalProps { open: boolean; @@ -791,7 +791,6 @@ export type BasicNode = { propertyTypes: Record; }; - export type GraphPropertiesTableProps = { propertiesWithTypes: { key: string; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 34e1b7263..3eb3a1fe8 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -12,26 +12,26 @@ export const llms = process.env?.VITE_LLM_MODELS?.trim() != '' ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) : [ - 'diffbot', - 'openai-gpt-3.5', - 'openai-gpt-4o', - 'openai-gpt-4o-mini', - 'gemini-1.5-pro', - 'gemini-1.5-flash', - 'azure_ai_gpt_35', - 'azure_ai_gpt_4o', - 'ollama_llama3', - 'groq_llama3_70b', - 'anthropic_claude_3_5_sonnet', - 'fireworks_llama_v3p2_90b', - 'bedrock_claude_3_5_sonnet', - ]; + 'diffbot', + 'openai-gpt-3.5', + 'openai-gpt-4o', + 'openai-gpt-4o-mini', + 'gemini-1.5-pro', + 'gemini-1.5-flash', + 'azure_ai_gpt_35', + 'azure_ai_gpt_4o', + 'ollama_llama3', + 'groq_llama3_70b', + 'anthropic_claude_3_5_sonnet', + 'fireworks_llama_v3p2_90b', + 'bedrock_claude_3_5_sonnet', + ]; export const defaultLLM = llms?.includes('openai-gpt-4o') ? 'openai-gpt-4o' : llms?.includes('gemini-1.5-pro') - ? 'gemini-1.5-pro' - : 'diffbot'; + ? 'gemini-1.5-pro' + : 'diffbot'; export const chatModeLables = { vector: 'vector', @@ -47,40 +47,40 @@ export const chatModeLables = { export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',').map((mode) => ({ - mode: mode.trim(), - description: getDescriptionForChatMode(mode.trim()), - })) + mode: mode.trim(), + description: getDescriptionForChatMode(mode.trim()), + })) : [ - { - mode: chatModeLables.vector, - description: 'Performs semantic similarity search on text chunks using vector indexing.', - }, - { - mode: chatModeLables.graph, - description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', - }, - { - mode: chatModeLables.graph_vector, - description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', - }, - { - mode: chatModeLables.fulltext, - description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', - }, - { - mode: chatModeLables.graph_vector_fulltext, - description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', - }, - { - mode: chatModeLables.entity_vector, - description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', - }, - { - mode: chatModeLables.global_vector, - description: - 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', - }, - ]; + { + mode: chatModeLables.vector, + description: 'Performs semantic similarity search on text chunks using vector indexing.', + }, + { + mode: chatModeLables.graph, + description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', + }, + { + mode: chatModeLables.graph_vector, + description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', + }, + { + mode: chatModeLables.fulltext, + description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', + }, + { + mode: chatModeLables.graph_vector_fulltext, + description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', + }, + { + mode: chatModeLables.entity_vector, + description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', + }, + { + mode: chatModeLables.global_vector, + description: + 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', + }, + ]; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; @@ -171,7 +171,7 @@ export const RETRY_OPIONS = [ ]; export const batchSize: number = parseInt(process.env.VITE_BATCH_SIZE ?? '2'); -//Graph Constants +// Graph Constants export const document = `+ [docs]`; export const chunks = `+ collect { MATCH p=(c)-[:NEXT_CHUNK]-() RETURN p } // chunk-chain @@ -279,4 +279,4 @@ export const getDefaultMessage = () => { export const appLabels = { ownSchema: 'Or Define your own Schema', predefinedSchema: 'Select a Pre-defined Schema', -}; \ No newline at end of file +}; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index ae41a19fb..9a5bec5a8 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -21,7 +21,6 @@ import s3logo from '../assets/images/s3logo.png'; import gcslogo from '../assets/images/gcs.webp'; import { chatModeLables } from './Constants'; - // Get the Url export const url = () => { let url = window.location.href.replace('5173', '8000'); @@ -205,7 +204,7 @@ export const filterData = ( graphType: GraphType[], allNodes: ExtendedNode[], allRelationships: Relationship[], - scheme: Scheme, + scheme: Scheme ) => { let filteredNodes: ExtendedNode[] = []; let filteredRelations: Relationship[] = []; @@ -215,9 +214,7 @@ export const filterData = ( ); // Only Document + Chunk // const processedEntities = entityTypes.flatMap(item => item.includes(',') ? item.split(',') : item); - if ( - graphType.includes('DocumentChunk') && !graphType.includes('Entities') && !graphType.includes('Communities') - ) { + if (graphType.includes('DocumentChunk') && !graphType.includes('Entities') && !graphType.includes('Communities')) { filteredNodes = allNodes.filter( (node) => (node.labels.includes('Document') && node.properties.fileName) || node.labels.includes('Chunk') ); @@ -230,8 +227,15 @@ export const filterData = ( ); filteredScheme = { Document: scheme.Document, Chunk: scheme.Chunk }; // Only Entity - } else if (graphType.includes('Entities') && !graphType.includes('DocumentChunk') && !graphType.includes('Communities')) { - const entityNodes = allNodes.filter((node) => !node.labels.includes('Document') && !node.labels.includes('Chunk') && !node.labels.includes('__Community__')); + } else if ( + graphType.includes('Entities') && + !graphType.includes('DocumentChunk') && + !graphType.includes('Communities') + ) { + const entityNodes = allNodes.filter( + (node) => + !node.labels.includes('Document') && !node.labels.includes('Chunk') && !node.labels.includes('__Community__') + ); filteredNodes = entityNodes ? entityNodes : []; const nodeIds = new Set(filteredNodes.map((node) => node.id)); filteredRelations = allRelationships.filter( @@ -258,7 +262,7 @@ export const filterData = ( } else if ( graphType.includes('DocumentChunk') && graphType.includes('Entities') && - (!graphType.includes('Communities')) + !graphType.includes('Communities') ) { filteredNodes = allNodes.filter( (node) => @@ -388,7 +392,7 @@ export const isFileCompleted = (waitingFile: CustomFile, item: SourceNode) => waitingFile && item.status === 'Completed'; export const calculateProcessedCount = (prev: number, batchSize: number) => - (prev === batchSize ? batchSize - 1 : prev + 1); + prev === batchSize ? batchSize - 1 : prev + 1; export const isProcessingFileValid = (item: SourceNode, userCredentials: UserCredentials) => { return item.status === 'Processing' && item.fileName != undefined && userCredentials && userCredentials.database; @@ -466,24 +470,28 @@ export function isAllowedHost(url: string, allowedHosts: string[]) { export const getCheckboxConditions = (allNodes: ExtendedNode[]) => { const isDocChunk = allNodes.some((n) => n.labels?.includes('Document') || n.labels?.includes('Chunk')); - const isEntity = allNodes.some((n) => !n.labels?.includes('Document') && !n.labels?.includes('Chunk') && !n.labels?.includes('__Community__')); + const isEntity = allNodes.some( + (n) => !n.labels?.includes('Document') && !n.labels?.includes('Chunk') && !n.labels?.includes('__Community__') + ); const isCommunity = allNodes.some((n) => n.labels?.includes('__Community__')); return { isDocChunk, isEntity, isCommunity }; }; -export const graphTypeFromNodes = (allNodes:ExtendedNode[])=>{ - const graphType: GraphType[] =[]; +export const graphTypeFromNodes = (allNodes: ExtendedNode[]) => { + const graphType: GraphType[] = []; const hasDocChunk = allNodes.some((n) => n.labels?.includes('Document') || n.labels?.includes('Chunk')); - const hasEntity = allNodes.some((n) => !n.labels?.includes('Document') && !n.labels?.includes('Chunk') && !n.labels?.includes('__Community__')); + const hasEntity = allNodes.some( + (n) => !n.labels?.includes('Document') && !n.labels?.includes('Chunk') && !n.labels?.includes('__Community__') + ); const hasCommunity = allNodes.some((n) => n.labels?.includes('__Community__')); - if(hasDocChunk){ + if (hasDocChunk) { graphType.push('DocumentChunk'); } - if(hasEntity){ + if (hasEntity) { graphType.push('Entities'); } - if(hasCommunity){ + if (hasCommunity) { graphType.push('Communities'); } return graphType; -} \ No newline at end of file +}; From 0cc118d01b1ba73a0efaed7956fa1848da065244 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Thu, 10 Oct 2024 19:35:38 +0530 Subject: [PATCH 163/292] Communities Bug fixes (#775) * added global search+vector+fulltext mode * added community details in chunk entities * added node ids * updated vector graph query * added entities and modified chat response * added params * api response changes * added chunk entity query * modifies query * labels cahnge for nodes * payload changes * added nodetails properties * payload new changes * communities check * communities selecetion check * enable communities * removed the selected prop * enable communities label change * communities name change * cred check * tooltip * fix: Copy Icon Theme Fix --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> --- backend/score.py | 1 + .../src/components/ChatBot/ChatModeToggle.tsx | 3 +- .../src/components/ChatBot/Communities.tsx | 1 + .../components/ChatBot/CommunitiesInfo.tsx | 40 +++++++++++++ frontend/src/components/Content.tsx | 13 +++- frontend/src/components/FileTable.tsx | 60 +++++++++++++++---- frontend/src/components/Layout/SideNav.tsx | 2 +- frontend/src/utils/Constants.ts | 2 +- frontend/src/utils/Utils.ts | 2 + 9 files changed, 108 insertions(+), 16 deletions(-) create mode 100644 frontend/src/components/ChatBot/CommunitiesInfo.tsx diff --git a/backend/score.py b/backend/score.py index 920fa00f9..a2ffd033f 100644 --- a/backend/score.py +++ b/backend/score.py @@ -423,6 +423,7 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber elapsed_time = end - start json_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") + # result['elapsed_api_time'] = f'{elapsed_time:.2f}' if int(chunkNumber) == int(totalChunks): return create_api_response('Success',data=result, message='Source Node Created Successfully') else: diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 127942c57..01e2d1aa7 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -22,7 +22,7 @@ export default function ChatModeToggle({ }) { const { setchatModes, chatModes, postProcessingTasks, selectedRows } = useFileContext(); const isCommunityAllowed = postProcessingTasks.includes('enable_communities'); - const { isGdsActive } = useCredentials(); + const { isGdsActive, userCredentials } = useCredentials(); useEffect(() => { // If rows are selected, the mode is valid (either vector or graph+vector) @@ -43,6 +43,7 @@ export default function ChatModeToggle({ } } }, [selectedRows.length, chatModes.length]); + const memoizedChatModes = useMemo(() => { return isGdsActive && isCommunityAllowed ? AvailableModes diff --git a/frontend/src/components/ChatBot/Communities.tsx b/frontend/src/components/ChatBot/Communities.tsx index e2092c101..11869d3d4 100644 --- a/frontend/src/components/ChatBot/Communities.tsx +++ b/frontend/src/components/ChatBot/Communities.tsx @@ -4,6 +4,7 @@ import ReactMarkdown from 'react-markdown'; import { CommunitiesProps } from '../../types'; const CommunitiesInfo: FC = ({ loading, communities }) => { + console.log('communities', communities); return ( <> {loading ? ( diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx new file mode 100644 index 000000000..ad37ceb81 --- /dev/null +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -0,0 +1,40 @@ +import { Box, LoadingSpinner, Flex, Typography } from '@neo4j-ndl/react'; +import { FC } from 'react'; +import ReactMarkdown from 'react-markdown'; +import { CommunitiesProps } from '../../types'; + +const CommunitiesInfo: FC = ({ loading, communities }) => { + return ( + <> + {loading ? ( + + + + ) : communities?.length > 0 ? ( +
    +
      + {communities.map((community, index) => ( +
    • +
      + + ID : + {community.id} + + + Score : + {community.score && {community.score}} + + {community.summary} +
      +
    • + ))} +
    +
    + ) : ( + No Communities Found + )} + + ); +}; + +export default CommunitiesInfo; diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index f74ca7020..641faf6d0 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -18,7 +18,16 @@ import { postProcessing } from '../services/PostProcessing'; import { triggerStatusUpdateAPI } from '../services/ServerSideStatusUpdateAPI'; import useServerSideEvent from '../hooks/useSse'; import { useSearchParams } from 'react-router-dom'; -import { batchSize, buttonCaptions, defaultLLM, largeFileSize, llms, RETRY_OPIONS, tooltips } from '../utils/Constants'; +import { + batchSize, + buttonCaptions, + chatModeLables, + defaultLLM, + largeFileSize, + llms, + RETRY_OPIONS, + tooltips, +} from '../utils/Constants'; import ButtonWithToolTip from './UI/ButtonWithToolTip'; import connectAPI from '../services/ConnectAPI'; import DropdownComponent from './Dropdown'; @@ -96,6 +105,7 @@ const Content: React.FC = ({ queue, processedCount, setProcessedCount, + setchatMode, } = useFileContext(); const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'>('tableView'); const [showDeletePopUp, setshowDeletePopUp] = useState(false); @@ -542,6 +552,7 @@ const Content: React.FC = ({ setSelectedNodes([]); setSelectedRels([]); setClearHistoryData(true); + setchatMode(chatModeLables.graph_vector_fulltext); }; const retryHandler = async (filename: string, retryoption: string) => { diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 4b6d4bb7e..4ae9cabc4 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -7,9 +7,11 @@ import { ProgressBar, StatusIndicator, TextLink, + Tip, Typography, + useCopyToClipboard, } from '@neo4j-ndl/react'; -import { forwardRef, useEffect, useImperativeHandle, useMemo, useRef, useState } from 'react'; +import { forwardRef, useContext, useEffect, useImperativeHandle, useMemo, useRef, useState } from 'react'; import { useReactTable, getCoreRowModel, @@ -35,7 +37,11 @@ import { } from '../utils/Utils'; import { SourceNode, CustomFile, FileTableProps, UserCredentials, statusupdate, ChildRef } from '../types'; import { useCredentials } from '../context/UserCredentials'; -import { ArrowPathIconSolid, MagnifyingGlassCircleIconSolid } from '@neo4j-ndl/react/icons'; +import { + ArrowPathIconSolid, + ClipboardDocumentIconOutline, + MagnifyingGlassCircleIconSolid, +} from '@neo4j-ndl/react/icons'; import CustomProgressBar from './UI/CustomProgressBar'; import subscribe from '../services/PollingAPI'; import { triggerStatusUpdateAPI } from '../services/ServerSideStatusUpdateAPI'; @@ -47,7 +53,9 @@ import { IconButtonWithToolTip } from './UI/IconButtonToolTip'; import { batchSize, largeFileSize, llms } from '../utils/Constants'; import IndeterminateCheckbox from './UI/CustomCheckBox'; import { showErrorToast, showNormalToast } from '../utils/toasts'; +import { ThemeWrapperContext } from '../context/ThemeWrapper'; let onlyfortheFirstRender = true; + const FileTable = forwardRef((props, ref) => { const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry } = props; const { filesData, setFilesData, model, rowSelection, setRowSelection, setSelectedRows, setProcessedCount, queue } = @@ -61,6 +69,8 @@ const FileTable = forwardRef((props, ref) => { const [fileSourceFilter, setFileSourceFilter] = useState(''); const [llmtypeFilter, setLLmtypeFilter] = useState(''); const skipPageResetRef = useRef(false); + const [_, copy] = useCopyToClipboard(); + const { colorMode } = useContext(ThemeWrapperContext); const tableRef = useRef(null); @@ -72,6 +82,10 @@ const FileTable = forwardRef((props, ref) => { showErrorToast(`${fileName} Failed to process`); } ); + + const handleCopy = (message: string) => { + copy(message); + }; const columns = useMemo( () => [ { @@ -140,14 +154,18 @@ const FileTable = forwardRef((props, ref) => { cell: (info) => { if (info.getValue() != 'Processing') { return ( -
    - - {info.getValue()} - {(info.getValue() === 'Completed' || info.getValue() === 'Failed' || info.getValue() === 'Cancelled') && - !isReadOnlyUser && ( + +
    + + + {info.getValue()} + + {(info.getValue() === 'Completed' || + info.getValue() === 'Failed' || + (info.getValue() === 'Cancelled' && !isReadOnlyUser)) && ( ((props, ref) => { )} -
    +
    + + {info.row.original?.status === 'Failed' && ( + + handleCopy(info.row.original?.errorMessage ?? '')} + > + + + + )} + ); } else if (info.getValue() === 'Processing' && info.row.original.processingProgress === undefined) { return ( @@ -510,7 +546,7 @@ const FileTable = forwardRef((props, ref) => { footer: (info) => info.column.id, }), ], - [filesData.length, statusFilter, filetypeFilter, llmtypeFilter, fileSourceFilter, isReadOnlyUser] + [filesData.length, statusFilter, filetypeFilter, llmtypeFilter, fileSourceFilter, isReadOnlyUser, colorMode] ); const table = useReactTable({ diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index 2472d6e76..12c815757 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -118,7 +118,7 @@ const SideNav: React.FC = ({ + } diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 3eb3a1fe8..c8c464ead 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -40,7 +40,7 @@ export const chatModeLables = { fulltext: 'fulltext', graph_vector_fulltext: 'graph+vector+fulltext', entity_vector: 'entity search+vector', - unavailableChatMode: 'Chat mode is unavailable when rows are selected', + unavailableChatMode: 'Chat mode is unavailable when files are selected', selected: 'Selected', global_vector: 'global search+vector+fulltext', }; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 9a5bec5a8..4aca97286 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -245,6 +245,7 @@ export const filterData = ( nodeIds.has(rel.to) ); filteredScheme = Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])) as Scheme; + console.log('labels', entityNodes); // Only Communities } else if ( graphType.includes('Communities') && @@ -332,6 +333,7 @@ export const filterData = ( filteredNodes = allNodes; filteredRelations = allRelationships; filteredScheme = scheme; + console.log('entity', filteredScheme); } return { filteredNodes, filteredRelations, filteredScheme }; }; From fcb9ab570ce75301412354425021337d6871b225 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 11 Oct 2024 06:26:18 +0000 Subject: [PATCH 164/292] llm name changes --- frontend/src/utils/Constants.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index c8c464ead..169590c4a 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -12,12 +12,12 @@ export const llms = process.env?.VITE_LLM_MODELS?.trim() != '' ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) : [ - 'diffbot', - 'openai-gpt-3.5', - 'openai-gpt-4o', - 'openai-gpt-4o-mini', - 'gemini-1.5-pro', - 'gemini-1.5-flash', + 'diffbot', + 'openai_gpt_3.5', + 'openai_gpt_4o', + 'openai_gpt_4o_mini', + 'gemini_1.5_pro', + 'gemini_1.5_flash', 'azure_ai_gpt_35', 'azure_ai_gpt_4o', 'ollama_llama3', From f5054888d3db335769ba3af5a9bf185526da5026 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 11 Oct 2024 06:32:01 +0000 Subject: [PATCH 165/292] build fix --- frontend/src/components/ChatBot/ChatModeToggle.tsx | 2 +- frontend/src/components/Content.tsx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 01e2d1aa7..0317661f6 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -22,7 +22,7 @@ export default function ChatModeToggle({ }) { const { setchatModes, chatModes, postProcessingTasks, selectedRows } = useFileContext(); const isCommunityAllowed = postProcessingTasks.includes('enable_communities'); - const { isGdsActive, userCredentials } = useCredentials(); + const { isGdsActive } = useCredentials(); useEffect(() => { // If rows are selected, the mode is valid (either vector or graph+vector) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 641faf6d0..95321fc0e 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -105,7 +105,7 @@ const Content: React.FC = ({ queue, processedCount, setProcessedCount, - setchatMode, + setchatModes } = useFileContext(); const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'>('tableView'); const [showDeletePopUp, setshowDeletePopUp] = useState(false); @@ -552,7 +552,7 @@ const Content: React.FC = ({ setSelectedNodes([]); setSelectedRels([]); setClearHistoryData(true); - setchatMode(chatModeLables.graph_vector_fulltext); + setchatModes([chatModeLables.graph_vector_fulltext]); }; const retryHandler = async (filename: string, retryoption: string) => { From 37de220112e0f40c6139498b143cbd77853e4df1 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 11 Oct 2024 07:43:39 +0000 Subject: [PATCH 166/292] default mode fix --- frontend/src/utils/Constants.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 169590c4a..7ca380bac 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -27,10 +27,10 @@ export const llms = 'bedrock_claude_3_5_sonnet', ]; -export const defaultLLM = llms?.includes('openai-gpt-4o') - ? 'openai-gpt-4o' - : llms?.includes('gemini-1.5-pro') - ? 'gemini-1.5-pro' +export const defaultLLM = llms?.includes('openai-gpt_4o') + ? 'openai-gpt_4o' + : llms?.includes('gemini-1.5_pro') + ? 'gemini-1.5_pro' : 'diffbot'; export const chatModeLables = { From a5382262e9608fbe5c67b3588bf887e2471e1a08 Mon Sep 17 00:00:00 2001 From: kaustubh-darekar Date: Fri, 11 Oct 2024 08:14:04 +0000 Subject: [PATCH 167/292] ragas model names update --- backend/score.py | 32 ++++++++------- backend/src/ragas_eval.py | 82 +++++++-------------------------------- 2 files changed, 33 insertions(+), 81 deletions(-) diff --git a/backend/score.py b/backend/score.py index a2ffd033f..22264b635 100644 --- a/backend/score.py +++ b/backend/score.py @@ -712,20 +712,24 @@ async def retry_processing(uri=Form(), userName=Form(), password=Form(), databas gc.collect() @app.post('/metric') -async def calculate_metric(question=Form(),context=Form(),answer=Form(),model=Form()): - try: - result = await asyncio.to_thread(get_ragas_metrics,question,context,answer,model) - if result is None: - return create_api_response('Failed', message='Failed to calculate metrics.',error="Ragas evaluation returned null") - return create_api_response('Success',data=result,message=f"Status set to Reprocess for filename : {result}") - except Exception as e: - job_status = "Failed" - message="Error while calculating evaluation metrics" - error_message = str(e) - logging.exception(f'{error_message}') - return create_api_response(job_status, message=message, error=error_message) - finally: - gc.collect() +async def calculate_metric(question=Form(), context=Form(), answer=Form(), model=Form()): + try: + result = await asyncio.to_thread(get_ragas_metrics, question, context, answer, model) + if result is None or "error" in result: + return create_api_response( + 'Failed', + message='Failed to calculate evaluation metrics.', + error=result.get("error", "Ragas evaluation returned null") + ) + return create_api_response('Success', data=result) + except Exception as e: + job_status = "Failed" + message = "Error while calculating evaluation metrics" + error_message = str(e) + logging.exception(f'{error_message}') + return create_api_response(job_status, message=message, error=error_message) + finally: + gc.collect() if __name__ == "__main__": uvicorn.run(app) diff --git a/backend/src/ragas_eval.py b/backend/src/ragas_eval.py index 940a809dc..e177b6d61 100644 --- a/backend/src/ragas_eval.py +++ b/backend/src/ragas_eval.py @@ -23,19 +23,13 @@ load_dotenv() -# Constants for clarity and maintainability RAGAS_MODEL_VERSIONS = { - "openai-gpt-3.5": "gpt-3.5-turbo-16k", - "gemini-1.0-pro": "gemini-1.0-pro-001", - "gemini-1.5-pro": "gemini-1.5-pro-002", - "gemini-1.5-flash": "gemini-1.5-flash-002", - "openai-gpt-4": "gpt-4-turbo-2024-04-09", - "openai-gpt-4o-mini": "gpt-4o-mini-2024-07-18", - "openai-gpt-4o": "gpt-4o-mini-2024-07-18", - "diffbot": "gpt-4-turbo-2024-04-09", - "groq-llama3": "groq_llama3_70b", + "openai_gpt_3.5": "gpt-3.5-turbo-16k", + "openai_gpt_4": "gpt-4-turbo-2024-04-09", + "openai_gpt_4o_mini": "gpt-4o-mini-2024-07-18", + "openai_gpt_4o": "gpt-4o-mini-2024-07-18", + "groq_llama3_70b": "groq_llama3_70b", } - EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL") EMBEDDING_FUNCTION, _ = load_embedding_model(EMBEDDING_MODEL) @@ -46,68 +40,16 @@ def get_ragas_llm(model: str) -> Tuple[object, str]: env_value = os.environ.get(env_key) logging.info(f"Loading model configuration: {env_key}") try: - if "gemini" in model: - credentials, project_id = google.auth.default() - model_name = RAGAS_MODEL_VERSIONS[model] - llm = ChatVertexAI( - model_name=model_name, - credentials=credentials, - project=project_id, - temperature=0, - safety_settings={ - #setting safety to NONE for all categories. Consider reviewing this for production systems - HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, - }, - ) - elif "openai" in model: + if "openai" in model: model_name = RAGAS_MODEL_VERSIONS[model] llm = ChatOpenAI( api_key=os.environ.get("OPENAI_API_KEY"), model=model_name, temperature=0 ) - - elif "azure" in model: - model_name, api_endpoint, api_key, api_version = env_value.split(",") - llm = AzureChatOpenAI( - api_key=api_key, - azure_endpoint=api_endpoint, - azure_deployment=model_name, - api_version=api_version, - temperature=0, - ) - elif "anthropic" in model: - model_name, api_key = env_value.split(",") - llm = ChatAnthropic(api_key=api_key, model=model_name, temperature=0) - elif "fireworks" in model: - model_name, api_key = env_value.split(",") - llm = ChatFireworks(api_key=api_key, model=model_name) elif "groq" in model: model_name, base_url, api_key = env_value.split(",") llm = ChatGroq(api_key=api_key, model_name=model_name, temperature=0) - elif "bedrock" in model: - model_name, aws_access_key, aws_secret_key, region_name = env_value.split(",") - bedrock_client = boto3.client( - service_name="bedrock-runtime", - region_name=region_name, - aws_access_key_id=aws_access_key, - aws_secret_access_key=aws_secret_key, - ) - llm = ChatBedrock( - client=bedrock_client, model_id=model_name, model_kwargs=dict(temperature=0) - ) - elif "ollama" in model: - model_name, base_url = env_value.split(",") - llm = ChatOllama(base_url=base_url, model=model_name) - elif "diffbot" in model: - llm = DiffbotGraphTransformer( - diffbot_api_key=os.environ.get("DIFFBOT_API_KEY"), - extract_types=["entities", "facts"], - ) else: - raise ValueError(f"Unsupported model: {model}") + raise ValueError(f"Unsupported model for evaluation: {model}") logging.info(f"Model loaded - Model Version: {model}") return llm, model_name @@ -145,6 +87,12 @@ def get_ragas_metrics( end_time = time.time() logging.info(f"Evaluation completed in: {end_time - start_time:.2f} seconds") return score_dict + except ValueError as e: + if "Unsupported model for evaluation" in str(e): + logging.error(f"Unsupported model error: {e}") + return {"error": str(e)} # Return the specific error message as a dictionary + logging.exception(f"ValueError during metrics evaluation: {e}") + return {"error": str(e)} except Exception as e: - logging.exception(f"Error during metrics evaluation: {e}") - return None \ No newline at end of file + logging.exception(f"Error during metrics evaluation: {e}") + return {"error": str(e)} From 784caa6cdafdc7246710a8ae20704632d0fe4925 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 11 Oct 2024 09:37:01 +0000 Subject: [PATCH 168/292] lint fixes --- .../src/components/ChatBot/ChatInfoModal.tsx | 22 +- .../src/components/ChatBot/ChatModeToggle.tsx | 2 +- frontend/src/components/ChatBot/Chatbot.tsx | 461 +++++++++--------- frontend/src/components/Content.tsx | 2 +- .../Deduplication/index.tsx | 4 +- .../SelectedJobList.tsx | 4 +- frontend/src/utils/Constants.ts | 10 +- frontend/src/utils/Utils.ts | 3 +- 8 files changed, 261 insertions(+), 247 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index c666d5206..31db9086e 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -71,7 +71,7 @@ const ChatInfoModal: React.FC = ({ const themeUtils = useContext(ThemeWrapperContext); const [, copy] = useCopyToClipboard(); const [copiedText, setcopiedText] = useState(false); - const [showMetricsTable, setShowMetricsTable] = useState(false); + const [showMetricsTable, setShowMetricsTable] = useState(Boolean(metricDetails)); const actions: CypherCodeBlockProps['actions'] = useMemo( () => [ @@ -178,7 +178,7 @@ const ChatInfoModal: React.FC = ({ } () => { setcopiedText(false); - setShowMetricsTable(false); + toggleMetricsLoading(); }; }, [nodeDetails, mode, error]); @@ -263,10 +263,22 @@ const ChatInfoModal: React.FC = ({ detailed scores for this interaction. These scores help us continuously improve the accuracy and helpfulness of our chatbots. + + Faithfulness: Determines How accurately the answer reflects the provided information + + + Answer Relevancy: Determines How well the answer addresses the user's question. + + + Context Utilization: Determines How effectively the system uses the retrieved information to answer the + question. + {showMetricsTable && } - + {!metricDetails && ( + + )} diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 0317661f6..df03a1083 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -43,7 +43,7 @@ export default function ChatModeToggle({ } } }, [selectedRows.length, chatModes.length]); - + const memoizedChatModes = useMemo(() => { return isGdsActive && isCommunityAllowed ? AvailableModes diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 9e3ba07f6..712286f1a 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -245,7 +245,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) ) ); } @@ -260,7 +260,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) ) ); } @@ -269,15 +269,15 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId + (msg.id === chatbotMessageId ? { - ...msg, - modes: { - ...msg.modes, - [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, - }, - } - : msg + ...msg, + modes: { + ...msg.modes, + [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, + }, + } + : msg) ) ); } @@ -290,19 +290,19 @@ const Chatbot: FC = (props) => { if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId + (msg.id === chatbotMessageId ? { - ...msg, - isLoading: false, - isTyping: false, - modes: { - [chatModes[0]]: { - message: 'An error occurred while processing your request.', - error: error.message, + ...msg, + isLoading: false, + isTyping: false, + modes: { + [chatModes[0]]: { + message: 'An error occurred while processing your request.', + error: error.message, + }, }, - }, - } - : msg + } + : msg) ) ); } @@ -404,105 +404,83 @@ const Chatbot: FC = (props) => { const textFile = new Blob([JSON.stringify(JsonData)], { type: 'application/json' }); if (downloadLinkRef && downloadLinkRef.current) { downloadLinkRef.current.href = URL.createObjectURL(textFile); - downloadLinkRef.current.download = 'data.json'; + downloadLinkRef.current.download = 'graph-builder-chat-details.json'; downloadLinkRef.current.click(); } }, []); -return ( -
    -
    - -
    - {listMessages.map((chat, index) => { - const messagechatModes = Object.keys(chat.modes); - return ( -
    -
    - {chat.user === 'chatbot' ? ( - - ) : ( - - )} -
    - +
    + +
    + {listMessages.map((chat, index) => { + const messagechatModes = Object.keys(chat.modes); + return ( +
    -
    - - {chat.modes[chat.currentMode]?.message || ''} - +
    + {chat.user === 'chatbot' ? ( + + ) : ( + + )}
    -
    -
    - - {chat.datetime} - + +
    + + {chat.modes[chat.currentMode]?.message || ''} +
    - {chat.user === 'chatbot' && - chat.id !== 2 && - !chat.isLoading && - !chat.isTyping && - (!isFullScreen ? ( - 1 ? 'space-between' : 'unset'} - alignItems='center' - > - - {messagechatModes.length > 1 && ( - { - const modes = Object.keys(chat.modes); - const modeswtich = modes[index]; - handleSwitchMode(chat.id, modeswtich); - }} - isFullScreen={false} - currentModeIndex={messagechatModes.indexOf(chat.currentMode)} - modescount={messagechatModes.length} - /> - )} - - ) : ( - - +
    +
    + + {chat.datetime} + +
    + {chat.user === 'chatbot' && + chat.id !== 2 && + !chat.isLoading && + !chat.isTyping && + (!isFullScreen ? ( + 1 ? 'space-between' : 'unset'} + alignItems='center' + > - - {messagechatModes.length > 1 && ( )} - - - ))} -
    -
    -
    - ); - })} -
    - -
    -
    -
    - + ) : ( + + + + + + {messagechatModes.length > 1 && ( + { + const modes = Object.keys(chat.modes); + const modeswtich = modes[index]; + handleSwitchMode(chat.id, modeswtich); + }} + isFullScreen={isFullScreen} + currentModeIndex={messagechatModes.indexOf(chat.currentMode)} + modescount={messagechatModes.length} + /> + )} + + + ))} +
    + +
    + ); + })} +
    +
    +
    +
    + + - + + {buttonCaptions.ask}{' '} + {selectedFileNames != undefined && selectedFileNames.length > 0 && `(${selectedFileNames.length})`} + + +
    + }> + setShowInfoModal(false)} + open={showInfoModal} > - {buttonCaptions.ask}{' '} - {selectedFileNames != undefined && selectedFileNames.length > 0 && `(${selectedFileNames.length})`} - - +
    + { + downloadClickHandler({ + chatResponse: activeChat, + chunks, + metricDetails, + communities, + responseTime, + entities: infoEntities, + nodes, + tokensUsed, + model, + }); + }} + > + + + "" + + + setShowInfoModal(false)} + > + + +
    + +
    +
    - }> - setShowInfoModal(false)} - open={showInfoModal} - > -
    - { - downloadClickHandler({ - chatResponse: activeChat, - chunks, - metricDetails, - communities, - responseTime, - entities: infoEntities, - nodes, - tokensUsed, - model, - }); - }} - > - - - "" - - - setShowInfoModal(false)} - > - - -
    - -
    -
    -
    -); + ); }; export default Chatbot; diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 95321fc0e..3eeace857 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -105,7 +105,7 @@ const Content: React.FC = ({ queue, processedCount, setProcessedCount, - setchatModes + setchatModes, } = useFileContext(); const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'>('tableView'); const [showDeletePopUp, setshowDeletePopUp] = useState(false); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 36fcff3d1..f5a021e30 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -80,12 +80,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - d.e.elementId === nodeid + (d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d + : d) ); }); }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx index a7621f2b6..1cb3c7310 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx @@ -11,11 +11,11 @@ export default function SelectedJobList({ }) { const ongoingPostProcessingTasks = useMemo( () => - isGdsActive + (isGdsActive ? postProcessingTasks.includes('enable_communities') ? postProcessingTasks : postProcessingTasks.filter((s) => s != 'enable_communities') - : postProcessingTasks.filter((s) => s != 'enable_communities'), + : postProcessingTasks.filter((s) => s != 'enable_communities')), [isGdsActive, postProcessingTasks] ); return ( diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 7ca380bac..1e8c67cef 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -12,7 +12,7 @@ export const llms = process.env?.VITE_LLM_MODELS?.trim() != '' ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) : [ - 'diffbot', + 'diffbot', 'openai_gpt_3.5', 'openai_gpt_4o', 'openai_gpt_4o_mini', @@ -27,10 +27,10 @@ export const llms = 'bedrock_claude_3_5_sonnet', ]; -export const defaultLLM = llms?.includes('openai-gpt_4o') - ? 'openai-gpt_4o' - : llms?.includes('gemini-1.5_pro') - ? 'gemini-1.5_pro' +export const defaultLLM = llms?.includes('openai_gpt_4o') + ? 'openai_gpt_4o' + : llms?.includes('gemini_1.5_pro') + ? 'gemini_1.5_pro' : 'diffbot'; export const chatModeLables = { diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 4aca97286..d5e2e0f97 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -245,7 +245,6 @@ export const filterData = ( nodeIds.has(rel.to) ); filteredScheme = Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])) as Scheme; - console.log('labels', entityNodes); // Only Communities } else if ( graphType.includes('Communities') && @@ -394,7 +393,7 @@ export const isFileCompleted = (waitingFile: CustomFile, item: SourceNode) => waitingFile && item.status === 'Completed'; export const calculateProcessedCount = (prev: number, batchSize: number) => - prev === batchSize ? batchSize - 1 : prev + 1; + (prev === batchSize ? batchSize - 1 : prev + 1); export const isProcessingFileValid = (item: SourceNode, userCredentials: UserCredentials) => { return item.status === 'Processing' && item.fileName != undefined && userCredentials && userCredentials.database; From b814f71ca1710f6615fc99f22740d2763f6b7333 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 11 Oct 2024 09:58:36 +0000 Subject: [PATCH 169/292] Chunk Entities API condition --- .../src/components/ChatBot/ChatInfoModal.tsx | 43 ++++++++++++------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 31db9086e..cfd7e7c2c 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -97,7 +97,10 @@ const ChatInfoModal: React.FC = ({ ); useEffect(() => { - if (mode != chatModeLables.graph || error?.trim() !== '') { + if ( + (mode != chatModeLables.graph || error?.trim() !== '') && + (!nodes.length || !infoEntities.length || !chunks.length) + ) { (async () => { toggleInfoLoading(); try { @@ -258,21 +261,29 @@ const ChatInfoModal: React.FC = ({ - - We use several key metrics to assess the quality of our chat responses. Click the button below to view - detailed scores for this interaction. These scores help us continuously improve the accuracy and - helpfulness of our chatbots. - - - Faithfulness: Determines How accurately the answer reflects the provided information - - - Answer Relevancy: Determines How well the answer addresses the user's question. - - - Context Utilization: Determines How effectively the system uses the retrieved information to answer the - question. - + + + + We use several key metrics to assess the quality of our chat responses. Click the button below to view + detailed scores for this interaction. These scores help us continuously improve the accuracy and + helpfulness of our chatbots. + + + + + Faithfulness: Determines How accurately the answer reflects the + provided information + + + Answer Relevancy: Determines How well the answer addresses the + user's question. + + + Context Utilization: Determines How effectively the system uses the + retrieved information to answer thequestion. + + + {showMetricsTable && } {!metricDetails && ( + )} diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 1e8c67cef..971227724 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -32,7 +32,7 @@ export const defaultLLM = llms?.includes('openai_gpt_4o') : llms?.includes('gemini_1.5_pro') ? 'gemini_1.5_pro' : 'diffbot'; - +export const supportedLLmsForRagas = ['openai_gpt_3.5', 'openai_gpt_4o', 'openai_gpt_4o_mini', 'groq_llama3_70b']; export const chatModeLables = { vector: 'vector', graph: 'graph', From c5a3dbf722fe5b92d39a6fcf459b707ddc1a1f9b Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 11 Oct 2024 10:14:45 +0000 Subject: [PATCH 171/292] removed unused imports --- frontend/src/components/ChatBot/ChatInfoModal.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index f151ecc9c..be74580ce 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -8,7 +8,6 @@ import { useCopyToClipboard, Banner, useMediaQuery, - Button, } from '@neo4j-ndl/react'; import { DocumentDuplicateIconOutline, ClipboardDocumentCheckIconOutline } from '@neo4j-ndl/react/icons'; import '../../styling/info.css'; From acdc886b6209ab1fadf2112b1df7bcc1e516c793 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 11 Oct 2024 10:41:38 +0000 Subject: [PATCH 172/292] multimode fix when we get error response --- frontend/src/components/ChatBot/Chatbot.tsx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 712286f1a..eadd14831 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -232,13 +232,13 @@ const Chatbot: FC = (props) => { total_tokens: response.info.total_tokens, response_time: response.info.response_time, cypher_query: response.info.cypher_query, - graphonly_entities: response.info.context, - entities: response.info.entities, + graphonly_entities: response.info.context??[], + entities: response.info.entities??[], nodeDetails: response.info.nodedetails, error: response.info.error, - metric_question: response.info.metric_details.question, - metric_answer: response.info.metric_details.answer, - metric_contexts: response.info.metric_details.contexts, + metric_question: response.info?.metric_details?.question??'', + metric_answer: response.info?.metric_details?.answer??'', + metric_contexts: response.info?.metric_details?.contexts??'', }; if (index === 0) { simulateTypingEffect(chatbotMessageId, responseMode, mode, responseMode.message); From 2734c4af0cde5f949c65cadd48c9108aa82bb21d Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 11 Oct 2024 13:40:25 +0000 Subject: [PATCH 173/292] mode changes for score display --- .../src/components/ChatBot/ChatInfoModal.tsx | 10 +++--- frontend/src/components/ChatBot/ChunkInfo.tsx | 33 +++++++++++++------ .../components/ChatBot/CommunitiesInfo.tsx | 9 ++--- frontend/src/types.ts | 2 ++ 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index c666d5206..d5347eab7 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -234,9 +234,9 @@ const ChatInfoModal: React.FC = ({ {mode != chatModeLables.graph ? Sources used : <>} {mode != chatModeLables.graph ? Chunks : <>} {mode === chatModeLables.graph_vector || - mode === chatModeLables.graph || - mode === chatModeLables.graph_vector_fulltext || - mode === chatModeLables.entity_vector ? ( + mode === chatModeLables.graph || + mode === chatModeLables.graph_vector_fulltext || + mode === chatModeLables.entity_vector ? ( Top Entities used ) : ( <> @@ -278,7 +278,7 @@ const ChatInfoModal: React.FC = ({ /> - + = ({ {mode === chatModeLables.entity_vector || mode === chatModeLables.global_vector ? ( - + ) : ( <> diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index 350b41b98..cbb94be21 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -9,8 +9,9 @@ import s3logo from '../../assets/images/s3logo.png'; import ReactMarkdown from 'react-markdown'; import { generateYouTubeLink, getLogo, isAllowedHost } from '../../utils/Utils'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; +import { chatModeLables } from '../../utils/Constants'; -const ChunkInfo: FC = ({ loading, chunks }) => { +const ChunkInfo: FC = ({ loading, chunks, mode }) => { const themeUtils = useContext(ThemeWrapperContext); return ( @@ -26,16 +27,18 @@ const ChunkInfo: FC = ({ loading, chunks }) => {
  • {chunk?.page_number ? ( <> -
    - - + + {chunk?.fileName}
    - Similarity Score: {chunk?.score} + {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( + Similarity Score: {chunk?.score} + )} ) : chunk?.url && chunk?.start_time ? ( <> @@ -50,7 +53,9 @@ const ChunkInfo: FC = ({ loading, chunks }) => {
  • - Similarity Score: {chunk?.score} + {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( + Similarity Score: {chunk?.score} + )} ) : chunk?.url && new URL(chunk.url).host === 'wikipedia.org' ? ( <> @@ -58,7 +63,9 @@ const ChunkInfo: FC = ({ loading, chunks }) => { {chunk?.fileName}
    - Similarity Score: {chunk?.score} + {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( + Similarity Score: {chunk?.score} + )} ) : chunk?.url && new URL(chunk.url).host === 'storage.googleapis.com' ? ( <> @@ -66,7 +73,9 @@ const ChunkInfo: FC = ({ loading, chunks }) => { {chunk?.fileName}
    - Similarity Score: {chunk?.score} + {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( + Similarity Score: {chunk?.score} + )} ) : chunk?.url && chunk?.url.startsWith('s3://') ? ( <> @@ -74,7 +83,9 @@ const ChunkInfo: FC = ({ loading, chunks }) => { {chunk?.fileName}
    - Similarity Score: {chunk?.score} + {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( + Similarity Score: {chunk?.score} + )} ) : chunk?.url && !chunk?.url.startsWith('s3://') && @@ -86,7 +97,9 @@ const ChunkInfo: FC = ({ loading, chunks }) => { {chunk?.url}
    - Similarity Score: {chunk?.score} + {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( + Similarity Score: {chunk?.score} + )} ) : ( <> diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index ad37ceb81..ec1017961 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -2,8 +2,9 @@ import { Box, LoadingSpinner, Flex, Typography } from '@neo4j-ndl/react'; import { FC } from 'react'; import ReactMarkdown from 'react-markdown'; import { CommunitiesProps } from '../../types'; +import { chatModeLables } from '../../utils/Constants'; -const CommunitiesInfo: FC = ({ loading, communities }) => { +const CommunitiesInfo: FC = ({ loading, communities, mode }) => { return ( <> {loading ? ( @@ -20,10 +21,10 @@ const CommunitiesInfo: FC = ({ loading, communities }) => { ID : {community.id} - + {mode === chatModeLables.global_vector && community.score && ( Score : - {community.score && {community.score}} - + {community.score} + )} {community.summary}
    diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 54deb12b9..d16a61beb 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -698,6 +698,7 @@ export type SourcesProps = { export type ChunkProps = { loading: boolean; chunks: Chunk[]; + mode:string; }; export type EntitiesProps = { @@ -710,6 +711,7 @@ export type EntitiesProps = { export type CommunitiesProps = { loading: boolean; communities: Community[]; + mode:string; }; export interface entity { From a12a6ab776a0f98c0a6d546e76c0f22aa58f9911 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 15 Oct 2024 06:36:04 +0000 Subject: [PATCH 174/292] fix: Fixed the details state handling between multiple chats feature: Added the warning banner If selected llm model is not supported for raga's evaluation --- .../src/components/ChatBot/ChatInfoModal.tsx | 21 ++++---- frontend/src/components/ChatBot/Chatbot.tsx | 22 +++++--- frontend/src/components/ChatBot/ChunkInfo.tsx | 52 ++++++++++++------- .../components/ChatBot/CommonChatActions.tsx | 6 ++- .../components/ChatBot/CommunitiesInfo.tsx | 10 ++-- .../src/components/Graph/GraphViewModal.tsx | 1 - frontend/src/types.ts | 4 +- 7 files changed, 71 insertions(+), 45 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index b4fcb77e7..c9d89d11c 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -8,6 +8,7 @@ import { useCopyToClipboard, Banner, useMediaQuery, + Button, } from '@neo4j-ndl/react'; import { DocumentDuplicateIconOutline, ClipboardDocumentCheckIconOutline } from '@neo4j-ndl/react/icons'; import '../../styling/info.css'; @@ -28,7 +29,6 @@ import { Relationship } from '@neo4j-nvl/base'; import { getChatMetrics } from '../../services/GetRagasMetric'; import MetricsTab from './MetricsTab'; import { Stack } from '@mui/material'; -import ButtonWithToolTip from '../UI/ButtonWithToolTip'; const ChatInfoModal: React.FC = ({ sources, @@ -237,9 +237,9 @@ const ChatInfoModal: React.FC = ({ {mode != chatModeLables.graph ? Sources used : <>} {mode != chatModeLables.graph ? Chunks : <>} {mode === chatModeLables.graph_vector || - mode === chatModeLables.graph || - mode === chatModeLables.graph_vector_fulltext || - mode === chatModeLables.entity_vector ? ( + mode === chatModeLables.graph || + mode === chatModeLables.graph_vector_fulltext || + mode === chatModeLables.entity_vector ? ( Top Entities used ) : ( <> @@ -262,11 +262,15 @@ const ChatInfoModal: React.FC = ({ + {!supportedLLmsForRagas.includes(metricmodel) && ( + LLM Model Not Supported ,Please Choose Different Model + )} We use several key metrics to assess the quality of our chat responses. Click the button below to view detailed scores for this interaction. These scores help us continuously improve the accuracy and - helpfulness of our chatbots. + helpfulness of our chatbots.This usually takes about 20 seconds. + You'll see detailed scores shortly. @@ -286,15 +290,14 @@ const ChatInfoModal: React.FC = ({ {showMetricsTable && } {!metricDetails && ( - View Detailed Metrics - + )} @@ -307,7 +310,7 @@ const ChatInfoModal: React.FC = ({ /> - + = (props) => { const [infoLoading, toggleInfoLoading] = useReducer((s) => !s, false); const [metricsLoading, toggleMetricsLoading] = useReducer((s) => !s, false); const downloadLinkRef = useRef(null); - const [activeChat, setActiveChat] = useState(); + const [activeChat, setActiveChat] = useState(null); const [_, copy] = useCopyToClipboard(); const { speak, cancel, speaking } = useSpeechSynthesis({ @@ -232,13 +232,13 @@ const Chatbot: FC = (props) => { total_tokens: response.info.total_tokens, response_time: response.info.response_time, cypher_query: response.info.cypher_query, - graphonly_entities: response.info.context??[], - entities: response.info.entities??[], + graphonly_entities: response.info.context ?? [], + entities: response.info.entities ?? [], nodeDetails: response.info.nodedetails, error: response.info.error, - metric_question: response.info?.metric_details?.question??'', - metric_answer: response.info?.metric_details?.answer??'', - metric_contexts: response.info?.metric_details?.contexts??'', + metric_question: response.info?.metric_details?.question ?? '', + metric_answer: response.info?.metric_details?.answer ?? '', + metric_contexts: response.info?.metric_details?.contexts ?? '', }; if (index === 0) { simulateTypingEffect(chatbotMessageId, responseMode, mode, responseMode.message); @@ -374,7 +374,7 @@ const Chatbot: FC = (props) => { } }; - const detailsHandler = useCallback((chat: Messages) => { + const detailsHandler = useCallback((chat: Messages, previousActiveChat: Messages | null) => { const currentMode = chat.modes[chat.currentMode]; setModelModal(currentMode.model ?? ''); setSourcesModal(currentMode.sources ?? []); @@ -391,6 +391,12 @@ const Chatbot: FC = (props) => { setMetricContext(currentMode.metric_contexts ?? ''); setMetricAnswer(currentMode.metric_answer ?? ''); setActiveChat(chat); + if (previousActiveChat != null && chat.id != previousActiveChat?.id) { + setNodes([]); + setChunks([]); + setInfoEntities([]); + setMetricDetails(null); + } }, []); const speechHandler = useCallback((chat: Messages) => { @@ -487,6 +493,7 @@ const Chatbot: FC = (props) => { detailsHandler={detailsHandler} listMessages={listMessages} speechHandler={speechHandler} + activeChat={activeChat} > {messagechatModes.length > 1 && ( = (props) => { detailsHandler={detailsHandler} listMessages={listMessages} speechHandler={speechHandler} + activeChat={activeChat} >
    diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index cbb94be21..05fa229a1 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -28,17 +28,19 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.page_number ? ( <>
    - - + {chunk?.fileName}
    - {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( - Similarity Score: {chunk?.score} - )} + {mode !== chatModeLables.global_vector && + mode !== chatModeLables.entity_vector && + mode !== chatModeLables.graph && ( + Similarity Score: {chunk?.score} + )} ) : chunk?.url && chunk?.start_time ? ( <> @@ -53,9 +55,11 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
    - {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( - Similarity Score: {chunk?.score} - )} + {mode !== chatModeLables.global_vector && + mode !== chatModeLables.entity_vector && + mode !== chatModeLables.graph && ( + Similarity Score: {chunk?.score} + )} ) : chunk?.url && new URL(chunk.url).host === 'wikipedia.org' ? ( <> @@ -63,9 +67,11 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.fileName}
    - {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( - Similarity Score: {chunk?.score} - )} + {mode !== chatModeLables.global_vector && + mode !== chatModeLables.entity_vector && + mode !== chatModeLables.graph && ( + Similarity Score: {chunk?.score} + )} ) : chunk?.url && new URL(chunk.url).host === 'storage.googleapis.com' ? ( <> @@ -73,9 +79,11 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.fileName}
    - {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( - Similarity Score: {chunk?.score} - )} + {mode !== chatModeLables.global_vector && + mode !== chatModeLables.entity_vector && + mode !== chatModeLables.graph && ( + Similarity Score: {chunk?.score} + )} ) : chunk?.url && chunk?.url.startsWith('s3://') ? ( <> @@ -83,9 +91,11 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.fileName}
    - {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( - Similarity Score: {chunk?.score} - )} + {mode !== chatModeLables.global_vector && + mode !== chatModeLables.entity_vector && + mode !== chatModeLables.graph && ( + Similarity Score: {chunk?.score} + )} ) : chunk?.url && !chunk?.url.startsWith('s3://') && @@ -97,9 +107,11 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.url}
    - {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( - Similarity Score: {chunk?.score} - )} + {mode !== chatModeLables.global_vector && + mode !== chatModeLables.entity_vector && + mode !== chatModeLables.graph && ( + Similarity Score: {chunk?.score} + )} ) : ( <> diff --git a/frontend/src/components/ChatBot/CommonChatActions.tsx b/frontend/src/components/ChatBot/CommonChatActions.tsx index 07f04bc8f..80d904f6a 100644 --- a/frontend/src/components/ChatBot/CommonChatActions.tsx +++ b/frontend/src/components/ChatBot/CommonChatActions.tsx @@ -10,9 +10,11 @@ export default function CommonActions({ speechHandler, copyHandler, listMessages, + activeChat, }: { chat: Messages; - detailsHandler: (chat: Messages) => void; + activeChat: Messages | null; + detailsHandler: (chat: Messages, activeChat: Messages | null) => void; speechHandler: (chat: Messages) => void; copyHandler: (message: string, id: number) => void; listMessages: Messages[]; @@ -27,7 +29,7 @@ export default function CommonActions({ text='Retrieval Information' label='Retrieval Information' disabled={chat.isTyping || chat.isLoading} - onClick={() => detailsHandler(chat)} + onClick={() => detailsHandler(chat, activeChat)} > {buttonCaptions.details} diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index ec1017961..088e4e370 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -21,10 +21,12 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = ID : {community.id} - {mode === chatModeLables.global_vector && community.score && ( - Score : - {community.score} - )} + {mode === chatModeLables.global_vector && community.score && ( + + Score : + {community.score} + + )} {community.summary}
    diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index dddfb4175..33a46c1c1 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -74,7 +74,6 @@ const GraphViewModal: React.FunctionComponent = ({ {} ); }; - console.log('graphType', graphType); // Unmounting the component useEffect(() => { const timeoutId = setTimeout(() => { diff --git a/frontend/src/types.ts b/frontend/src/types.ts index d16a61beb..0f8f913d3 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -698,7 +698,7 @@ export type SourcesProps = { export type ChunkProps = { loading: boolean; chunks: Chunk[]; - mode:string; + mode: string; }; export type EntitiesProps = { @@ -711,7 +711,7 @@ export type EntitiesProps = { export type CommunitiesProps = { loading: boolean; communities: Community[]; - mode:string; + mode: string; }; export interface entity { From ba091a0f9ecd370b2f1864ed401b330ea26e5171 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 15 Oct 2024 07:23:33 +0000 Subject: [PATCH 175/292] Fix: Entity Mode Width Fix --- frontend/src/components/ChatBot/Chatbot.tsx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 14e2daffe..b3a122433 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -406,6 +406,7 @@ const Chatbot: FC = (props) => { handleSpeak(chat.modes[chat.currentMode]?.message, chat.id); } }, []); + const downloadClickHandler = useCallback(function downloadClickHandler(JsonData: Type) { const textFile = new Blob([JSON.stringify(JsonData)], { type: 'application/json' }); if (downloadLinkRef && downloadLinkRef.current) { @@ -580,6 +581,7 @@ const Chatbot: FC = (props) => { }} onClose={() => setShowInfoModal(false)} open={showInfoModal} + size={activeChat?.currentMode === chatModeLables.entity_vector ? 'large' : 'medium'} >
    Date: Tue, 15 Oct 2024 14:07:40 +0530 Subject: [PATCH 176/292] diffbot fix for async (#797) --- backend/src/llm.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/src/llm.py b/backend/src/llm.py index f43be2a67..93ee0f08f 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -150,7 +150,6 @@ async def get_graph_document_list( ): futures = [] graph_document_list = [] - if "diffbot_api_key" in dir(llm): llm_transformer = llm else: @@ -181,7 +180,11 @@ async def get_graph_document_list( # for i, future in enumerate(concurrent.futures.as_completed(futures)): # graph_document = future.result() # graph_document_list.append(graph_document[0]) - graph_document_list = await llm_transformer.aconvert_to_graph_documents(combined_chunk_document_list) + + if isinstance(llm,DiffbotGraphTransformer): + graph_document_list = llm_transformer.convert_to_graph_documents(combined_chunk_document_list) + else: + graph_document_list = await llm_transformer.aconvert_to_graph_documents(combined_chunk_document_list) return graph_document_list From 821b0f438734dc61f55d001988cec6ab07003de4 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Tue, 15 Oct 2024 14:43:27 +0530 Subject: [PATCH 177/292] Minor changes (#798) * added congig variable for default diffbot chat model * fulltext index creation is skipped when the labels are empty * entity vector change * added optinal to communities for entity mode * updated the entity query --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> --- backend/src/QA_integration.py | 3 ++- backend/src/post_processing.py | 9 ++++++--- backend/src/shared/constants.py | 4 ++-- frontend/src/components/ChatBot/ChatModeToggle.tsx | 2 +- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 30375a9d9..cf7c74f6d 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -402,7 +402,8 @@ def get_neo4j_retriever(graph, document_names,chat_mode_settings, score_threshol def setup_chat(model, graph, document_names, chat_mode_settings): start_time = time.time() try: - model = "openai-gpt-4o" if model == "diffbot" else model + if model == "diffbot": + model = os.getenv('DEFAULT_DIFFBOT_CHAT_MODEL') llm, model_name = get_llm(model=model) logging.info(f"Model called in chat: {model} (version: {model_name})") diff --git a/backend/src/post_processing.py b/backend/src/post_processing.py index 22af50b76..7746df3d0 100644 --- a/backend/src/post_processing.py +++ b/backend/src/post_processing.py @@ -93,9 +93,12 @@ def create_fulltext(driver,type): for label in FILTER_LABELS: if label in labels: labels.remove(label) - - labels_str = ":" + "|".join([f"`{label}`" for label in labels]) - logging.info(f"Fetched labels in {time.time() - start_step:.2f} seconds.") + if labels: + labels_str = ":" + "|".join([f"`{label}`" for label in labels]) + logging.info(f"Fetched labels in {time.time() - start_step:.2f} seconds.") + else: + logging.info("Full text index is not created as labels are empty") + return except Exception as e: logging.error(f"Failed to fetch labels: {e}") return diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 20bbffa3f..33b27b20a 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -382,7 +382,7 @@ collect {{ UNWIND nodes AS n - MATCH (n)-[:IN_COMMUNITY]->(c:__Community__) + OPTIONAL MATCH (n)-[:IN_COMMUNITY]->(c:__Community__) WITH c, c.community_rank AS rank, c.weight AS weight RETURN c ORDER BY rank, weight DESC @@ -477,7 +477,7 @@ } ] AS chunks, [ - community IN communities | + community IN communities WHERE community IS NOT NULL | community { .*, embedding: null diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index df03a1083..cd7b4b796 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -48,7 +48,7 @@ export default function ChatModeToggle({ return isGdsActive && isCommunityAllowed ? AvailableModes : AvailableModes?.filter( - (m) => !m.mode.includes(chatModeLables.entity_vector) && !m.mode.includes(chatModeLables.global_vector) + (m) => !m.mode.includes(chatModeLables.global_vector) ); }, [isGdsActive, isCommunityAllowed]); const menuItems = useMemo(() => { From 702ebf761673eab390be85747731d49a2f914858 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 15 Oct 2024 09:32:19 +0000 Subject: [PATCH 178/292] New: Added the supported llm models for ragas evaluation --- .../src/components/ChatBot/ChatInfoModal.tsx | 23 +++++++++++++++---- frontend/src/components/ChatBot/Chatbot.tsx | 2 +- frontend/src/components/Dropdown.tsx | 11 ++------- frontend/src/components/FileTable.tsx | 13 ++--------- frontend/src/utils/Utils.ts | 1 + 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index c9d89d11c..7401f0fa5 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -29,6 +29,7 @@ import { Relationship } from '@neo4j-nvl/base'; import { getChatMetrics } from '../../services/GetRagasMetric'; import MetricsTab from './MetricsTab'; import { Stack } from '@mui/material'; +import { capitalizeWithUnderscore } from '../../utils/Utils'; const ChatInfoModal: React.FC = ({ sources, @@ -263,14 +264,28 @@ const ChatInfoModal: React.FC = ({ {!supportedLLmsForRagas.includes(metricmodel) && ( - LLM Model Not Supported ,Please Choose Different Model + + Currently ragas evaluation works on{' '} + {supportedLLmsForRagas.map((s, idx) => ( + + {capitalizeWithUnderscore(s) + (idx != supportedLLmsForRagas.length - 1 ? ',' : '')} + + ))} + . + + } + > )} We use several key metrics to assess the quality of our chat responses. Click the button below to view - detailed scores for this interaction. These scores help us continuously improve the accuracy and - helpfulness of our chatbots.This usually takes about 20 seconds. - You'll see detailed scores shortly. + detailed scores for this interaction using ragas framework. These + scores help us continuously improve the accuracy and helpfulness of our chatbots.This usually takes + about 20 seconds . You'll see detailed scores shortly. diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index b3a122433..bb50cd54f 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -406,7 +406,7 @@ const Chatbot: FC = (props) => { handleSpeak(chat.modes[chat.currentMode]?.message, chat.id); } }, []); - + const downloadClickHandler = useCallback(function downloadClickHandler(JsonData: Type) { const textFile = new Blob([JSON.stringify(JsonData)], { type: 'application/json' }); if (downloadLinkRef && downloadLinkRef.current) { diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index cdf658824..48188f38b 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -1,7 +1,7 @@ import { Dropdown, Tip } from '@neo4j-ndl/react'; import { OptionType, ReusableDropdownProps } from '../types'; import { memo, useMemo, useReducer } from 'react'; -import { capitalize } from '../utils/Utils'; +import { capitalize, capitalizeWithUnderscore } from '../utils/Utils'; const DropdownComponent: React.FC = ({ options, @@ -31,14 +31,7 @@ const DropdownComponent: React.FC = ({ onChange: handleChange, options: allOptions?.map((option) => { const label = - typeof option === 'string' - ? (option.includes('LLM_MODEL_CONFIG_') - ? capitalize(option.split('LLM_MODEL_CONFIG_').at(-1) as string) - : capitalize(option) - ) - .split('_') - .join(' ') - : capitalize(option.label); + typeof option === 'string' ? capitalizeWithUnderscore(option) : capitalize(option.label); const value = typeof option === 'string' ? option : option.value; return { label, diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 4ae9cabc4..e5aa5ada4 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -29,11 +29,11 @@ import { getSourceNodes } from '../services/GetFiles'; import { v4 as uuidv4 } from 'uuid'; import { statusCheck, - capitalize, isFileCompleted, calculateProcessedCount, getFileSourceStatus, isProcessingFileValid, + capitalizeWithUnderscore, } from '../utils/Utils'; import { SourceNode, CustomFile, FileTableProps, UserCredentials, statusupdate, ChildRef } from '../types'; import { useCredentials } from '../context/UserCredentials'; @@ -467,16 +467,7 @@ const FileTable = forwardRef((props, ref) => { id: 'model', cell: (info) => { const model = info.getValue(); - return ( - - {(model.includes('LLM_MODEL_CONFIG_') - ? capitalize(model.split('LLM_MODEL_CONFIG_').at(-1) as string) - : capitalize(model) - ) - .split('_') - .join(' ')} - - ); + return {capitalizeWithUnderscore(model)}; }, header: () => Model, footer: (info) => info.column.id, diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index d5e2e0f97..4b0ad5e0e 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -410,6 +410,7 @@ export const capitalizeWithPlus = (s: string) => { .map((s) => capitalize(s)) .join('+'); }; +export const capitalizeWithUnderscore = (s: string) => capitalize(s).split('_').join(' '); export const getDescriptionForChatMode = (mode: string): string => { switch (mode.toLowerCase()) { From dcb39757c8c4b62e3e24c000564786192db27099 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 15 Oct 2024 09:45:03 +0000 Subject: [PATCH 179/292] Fix: Communitites Tab is displayed based communitites length --- frontend/src/components/ChatBot/ChatInfoModal.tsx | 6 +++++- frontend/src/components/ChatBot/ChatModeToggle.tsx | 4 +--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 7401f0fa5..7fb819a32 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -250,7 +250,11 @@ const ChatInfoModal: React.FC = ({ ) : ( <> )} - {mode === chatModeLables.entity_vector ? Communities : <>} + {mode === chatModeLables.entity_vector && communities.length ? ( + Communities + ) : ( + <> + )} Evaluation Metrics )} diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index cd7b4b796..d7ce7a3bf 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -47,9 +47,7 @@ export default function ChatModeToggle({ const memoizedChatModes = useMemo(() => { return isGdsActive && isCommunityAllowed ? AvailableModes - : AvailableModes?.filter( - (m) => !m.mode.includes(chatModeLables.global_vector) - ); + : AvailableModes?.filter((m) => !m.mode.includes(chatModeLables.global_vector)); }, [isGdsActive, isCommunityAllowed]); const menuItems = useMemo(() => { return memoizedChatModes?.map((m) => { From d3e53659a4977633e48d7e0f4f1ad9d02033c9cf Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 15 Oct 2024 16:55:11 +0530 Subject: [PATCH 180/292] added the conversation download button (#800) --- frontend/src/components/ChatBot/Chatbot.tsx | 37 +++++++++------------ frontend/src/components/Layout/SideNav.tsx | 33 ++++++++++++++++-- frontend/src/utils/Utils.ts | 12 +++++++ 3 files changed, 58 insertions(+), 24 deletions(-) diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index bb50cd54f..9cc426d12 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -37,7 +37,7 @@ import { buttonCaptions, chatModeLables } from '../../utils/Constants'; import useSpeechSynthesis from '../../hooks/useSpeech'; import ButtonWithToolTip from '../UI/ButtonWithToolTip'; import FallBackDialog from '../UI/FallBackDialog'; -import { getDateTime } from '../../utils/Utils'; +import { downloadClickHandler, getDateTime } from '../../utils/Utils'; import ChatModesSwitch from './ChatModesSwitch'; import CommonActions from './CommonChatActions'; const InfoModal = lazy(() => import('./ChatInfoModal')); @@ -407,15 +407,6 @@ const Chatbot: FC = (props) => { } }, []); - const downloadClickHandler = useCallback(function downloadClickHandler(JsonData: Type) { - const textFile = new Blob([JSON.stringify(JsonData)], { type: 'application/json' }); - if (downloadLinkRef && downloadLinkRef.current) { - downloadLinkRef.current.href = URL.createObjectURL(textFile); - downloadLinkRef.current.download = 'graph-builder-chat-details.json'; - downloadLinkRef.current.click(); - } - }, []); - return (
    @@ -590,17 +581,21 @@ const Chatbot: FC = (props) => { clean disabled={metricsLoading || infoLoading} onClick={() => { - downloadClickHandler({ - chatResponse: activeChat, - chunks, - metricDetails, - communities, - responseTime, - entities: infoEntities, - nodes, - tokensUsed, - model, - }); + downloadClickHandler( + { + chatResponse: activeChat, + chunks, + metricDetails, + communities, + responseTime, + entities: infoEntities, + nodes, + tokensUsed, + model, + }, + downloadLinkRef, + 'graph-builder-chat-details.json' + ); }} > diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index 12c815757..e09ac434a 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -1,5 +1,5 @@ -import React, { useEffect, useState } from 'react'; -import { Dialog, SideNavigation, Tip, useMediaQuery } from '@neo4j-ndl/react'; +import React, { useEffect, useRef, useState } from 'react'; +import { Dialog, SideNavigation, TextLink, Tip, useMediaQuery } from '@neo4j-ndl/react'; import { ArrowRightIconOutline, ArrowLeftIconOutline, @@ -7,12 +7,13 @@ import { ArrowsPointingOutIconOutline, ChatBubbleOvalLeftEllipsisIconOutline, CloudArrowUpIconSolid, + ArrowDownTrayIconOutline, } from '@neo4j-ndl/react/icons'; import { SideNavProps } from '../../types'; import Chatbot from '../ChatBot/Chatbot'; import { createPortal } from 'react-dom'; import { useMessageContext } from '../../context/UserMessages'; -import { getIsLoading } from '../../utils/Utils'; +import { downloadClickHandler, getIsLoading } from '../../utils/Utils'; import ExpandedChatButtonContainer from '../ChatBot/ExpandedChatButtonContainer'; import { APP_SOURCES, tooltips } from '../../utils/Constants'; import ChatModeToggle from '../ChatBot/ChatModeToggle'; @@ -46,6 +47,7 @@ const SideNav: React.FC = ({ const [showChatMode, setshowChatMode] = useState(false); const largedesktops = useMediaQuery(`(min-width:1440px )`); const { connectionStatus, isReadOnlyUser } = useCredentials(); + const downloadLinkRef = useRef(null); const date = new Date(); useEffect(() => { @@ -93,6 +95,7 @@ const SideNav: React.FC = ({ toggleDrawer(); } }; + return (
    @@ -189,6 +192,30 @@ const SideNav: React.FC = ({ } /> + + { + downloadClickHandler( + { conversation: messages }, + downloadLinkRef, + 'graph-builder-conversation.json' + ); + }} + icon={ + <> + + + + + Download Conversation + + "" + + + + } + /> + {!isChatModalOpen && ( { diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 4b0ad5e0e..27645ac7e 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -497,3 +497,15 @@ export const graphTypeFromNodes = (allNodes: ExtendedNode[]) => { } return graphType; }; +export function downloadClickHandler( + JsonData: Type, + downloadLinkRef: React.RefObject, + filename: string +) { + const textFile = new Blob([JSON.stringify(JsonData)], { type: 'application/json' }); + if (downloadLinkRef && downloadLinkRef.current) { + downloadLinkRef.current.href = URL.createObjectURL(textFile); + downloadLinkRef.current.download = filename; + downloadLinkRef.current.click(); + } +} From 0ff8bd11a7df50d6fd918fef96d2b93a300d5dcc Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Tue, 15 Oct 2024 13:56:09 +0000 Subject: [PATCH 181/292] model name correction --- backend/score.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/score.py b/backend/score.py index 22264b635..80eb3a7c2 100644 --- a/backend/score.py +++ b/backend/score.py @@ -276,7 +276,7 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database logging.info(f'Entity Embeddings created') if "enable_communities" in tasks: - model = "openai-gpt-4o" + model = "openai_gpt_4o" await asyncio.to_thread(create_communities, uri, userName, password, database,model) josn_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(josn_obj) From 09ea0717bb28ded897191ff347574fb453cf9947 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 17 Oct 2024 11:49:28 +0000 Subject: [PATCH 182/292] chatmode switch mode fix --- frontend/src/components/ChatBot/Chatbot.tsx | 61 ++++++++++----------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 9cc426d12..a1eeb35ef 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -269,15 +269,15 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId - ? { - ...msg, - modes: { - ...msg.modes, - [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, - }, - } - : msg) + (msg.id === chatbotMessageId + ? { + ...msg, + modes: { + ...msg.modes, + [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, + }, + } + : msg) ) ); } @@ -290,19 +290,19 @@ const Chatbot: FC = (props) => { if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId - ? { - ...msg, - isLoading: false, - isTyping: false, - modes: { - [chatModes[0]]: { - message: 'An error occurred while processing your request.', - error: error.message, - }, - }, - } - : msg) + (msg.id === chatbotMessageId + ? { + ...msg, + isLoading: false, + isTyping: false, + modes: { + [chatModes[0]]: { + message: 'An error occurred while processing your request.', + error: error.message, + }, + }, + } + : msg) ) ); } @@ -391,7 +391,7 @@ const Chatbot: FC = (props) => { setMetricContext(currentMode.metric_contexts ?? ''); setMetricAnswer(currentMode.metric_answer ?? ''); setActiveChat(chat); - if (previousActiveChat != null && chat.id != previousActiveChat?.id) { + if ((previousActiveChat != null && chat.id != previousActiveChat?.id) || (previousActiveChat != null && chat.currentMode != previousActiveChat.currentMode)) { setNodes([]); setChunks([]); setInfoEntities([]); @@ -450,14 +450,12 @@ const Chatbot: FC = (props) => {
    {chat.modes[chat.currentMode]?.message || ''} @@ -541,9 +539,8 @@ const Chatbot: FC = (props) => {
    Date: Fri, 18 Oct 2024 15:36:58 +0530 Subject: [PATCH 183/292] Add API payload GCP logging (#805) --- backend/score.py | 79 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 11 deletions(-) diff --git a/backend/score.py b/backend/score.py index 80eb3a7c2..1efd46f86 100644 --- a/backend/score.py +++ b/backend/score.py @@ -93,6 +93,10 @@ async def create_source_knowledge_graph_url( try: start = time.time() + payload_json_obj = {'api_name':'url_scan', 'db_url':uri, 'userName':userName, 'database':database, 'source_url':source_url, 'aws_access_key_id':aws_access_key_id, + 'model':model, 'gcs_bucket_name':gcs_bucket_name, 'gcs_bucket_folder':gcs_bucket_folder, 'source_type':source_type, + 'gcs_project_id':gcs_project_id, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") if source_url is not None: source = source_url else: @@ -174,6 +178,11 @@ async def extract_knowledge_graph_from_file( """ try: start_time = time.time() + payload_json_obj = {'api_name':'extract', 'db_url':uri, 'userName':userName, 'database':database, 'source_url':source_url, 'aws_access_key_id':aws_access_key_id, + 'model':model, 'gcs_bucket_name':gcs_bucket_name, 'gcs_bucket_folder':gcs_bucket_folder, 'source_type':source_type,'gcs_blob_filename':gcs_blob_filename, + 'file_name':file_name, 'gcs_project_id':gcs_project_id, 'wiki_query':wiki_query,'allowedNodes':allowedNodes,'allowedRelationship':allowedRelationship, + 'language':language ,'retry_condition':retry_condition,'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) @@ -238,6 +247,8 @@ async def get_source_list(uri:str, userName:str, password:str, database:str=None """ try: start = time.time() + payload_json_obj = {'api_name':'sources_list', 'db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") decoded_password = decode_password(password) if " " in uri: uri = uri.replace(" ","+") @@ -257,6 +268,8 @@ async def get_source_list(uri:str, userName:str, password:str, database:str=None @app.post("/post_processing") async def post_processing(uri=Form(), userName=Form(), password=Form(), database=Form(), tasks=Form(None)): try: + payload_json_obj = {'api_name':'post_processing', 'db_url':uri, 'userName':userName, 'database':database, 'tasks':tasks, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) tasks = set(map(str.strip, json.loads(tasks))) @@ -279,8 +292,9 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database model = "openai_gpt_4o" await asyncio.to_thread(create_communities, uri, userName, password, database,model) josn_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(josn_obj) logging.info(f'created communities') + + logger.log_struct(josn_obj) return create_api_response('Success', message='All tasks completed successfully') except Exception as e: @@ -298,6 +312,9 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), logging.info(f"QA_RAG called at {datetime.now()}") qa_rag_start_time = time.time() try: + payload_json_obj = {'api_name':'chat_bot', 'db_url':uri, 'userName':userName, 'database':database, 'question':question,'document_names':document_names, + 'session_id':session_id, 'mode':mode, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") if mode == "graph": graph = Neo4jGraph( url=uri,username=userName,password=password,database=database,sanitize = True, refresh_schema=True) else: @@ -311,7 +328,7 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), logging.info(f"Total Response time is {total_call_time:.2f} seconds") result["info"]["response_time"] = round(total_call_time, 2) - json_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{total_call_time:.2f}'} + json_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id,'mode':mode, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{total_call_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result) @@ -328,6 +345,9 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), nodedetails=Form(None),entities=Form(),mode=Form()): try: start = time.time() + payload_json_obj = {'api_name':'chunk_entities', 'db_url':uri, 'userName':userName, 'database':database, 'nodedetails':nodedetails,'entities':entities, + 'mode':mode, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,nodedetails=nodedetails,entities=entities,mode=mode) end = time.time() elapsed_time = end - start @@ -352,7 +372,9 @@ async def graph_query( document_names: str = Form(None), ): try: - # print(document_names) + payload_json_obj = {'api_name':'graph_query', 'db_url':uri, 'userName':userName, 'database':database, 'document_names':document_names, + 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") start = time.time() result = await asyncio.to_thread( get_graph_results, @@ -380,6 +402,8 @@ async def graph_query( @app.post("/clear_chat_bot") async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=Form(), session_id=Form(None)): try: + payload_json_obj = {'api_name':'clear_chat_bot', 'db_url':uri, 'userName':userName, 'database':database, 'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(clear_chat_history,graph=graph,session_id=session_id) return create_api_response('Success',data=result) @@ -396,6 +420,8 @@ async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=F async def connect(uri=Form(), userName=Form(), password=Form(), database=Form()): try: start = time.time() + payload_json_obj = {'api_name':'connect', 'db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph, database) end = time.time() @@ -417,6 +443,9 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber password=Form(), database=Form()): try: start = time.time() + payload_json_obj = {'api_name':'upload', 'db_url':uri, 'userName':userName, 'database':database, 'chunkNumber':chunkNumber,'totalChunks':totalChunks, + 'original_file_name':originalname,'model':model, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(upload_file, graph, model, file, chunkNumber, totalChunks, originalname, uri, CHUNK_DIR, MERGED_DIR) end = time.time() @@ -442,6 +471,8 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber async def get_structured_schema(uri=Form(), userName=Form(), password=Form(), database=Form()): try: start = time.time() + payload_json_obj = {'api_name':'schema', 'db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(get_labels_and_relationtypes, graph) end = time.time() @@ -512,6 +543,9 @@ async def delete_document_and_entities(uri=Form(), deleteEntities=Form()): try: start = time.time() + payload_json_obj = {'api_name':'delete_document_and_entities', 'db_url':uri, 'userName':userName, 'database':database, 'filenames':filenames,'deleteEntities':deleteEntities, + 'source_types':source_types, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) result, files_list_size = await asyncio.to_thread(graphDb_data_Access.delete_file_from_graph, filenames, source_types, deleteEntities, MERGED_DIR, uri) @@ -568,6 +602,9 @@ async def get_document_status(file_name, url, userName, password, database): @app.post("/cancelled_job") async def cancelled_job(uri=Form(), userName=Form(), password=Form(), database=Form(), filenames=Form(None), source_types=Form(None)): try: + payload_json_obj = {'api_name':'cancelled_job', 'db_url':uri, 'userName':userName, 'database':database, + 'filenames':filenames,'source_types':source_types,'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) result = manually_cancelled_job(graph,filenames, source_types, MERGED_DIR, uri) @@ -584,6 +621,8 @@ async def cancelled_job(uri=Form(), userName=Form(), password=Form(), database=F @app.post("/populate_graph_schema") async def populate_graph_schema(input_text=Form(None), model=Form(None), is_schema_description_checked=Form(None)): try: + payload_json_obj = {'api_name':'populate_graph_schema', 'model':model, 'is_schema_description_checked':is_schema_description_checked, 'input_text':input_text, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") result = populate_graph_schema_from_text(input_text, model, is_schema_description_checked) return create_api_response('Success',data=result) except Exception as e: @@ -598,6 +637,8 @@ async def populate_graph_schema(input_text=Form(None), model=Form(None), is_sche @app.post("/get_unconnected_nodes_list") async def get_unconnected_nodes_list(uri=Form(), userName=Form(), password=Form(), database=Form()): try: + payload_json_obj = {'api_name':'get_unconnected_nodes_list', 'db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") start = time.time() graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) @@ -619,6 +660,9 @@ async def get_unconnected_nodes_list(uri=Form(), userName=Form(), password=Form( @app.post("/delete_unconnected_nodes") async def delete_orphan_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(),unconnected_entities_list=Form()): try: + payload_json_obj = {'api_name':'delete_unconnected_nodes', 'db_url':uri, 'userName':userName, 'database':database, + 'unconnected_entities_list':unconnected_entities_list, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") start = time.time() graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) @@ -641,6 +685,8 @@ async def delete_orphan_nodes(uri=Form(), userName=Form(), password=Form(), data async def get_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form()): try: start = time.time() + payload_json_obj = {'api_name':'get_duplicate_nodes', 'db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) nodes_list, total_nodes = graphDb_data_Access.get_duplicate_nodes_list() @@ -662,6 +708,9 @@ async def get_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), data async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(),duplicate_nodes_list=Form()): try: start = time.time() + payload_json_obj = {'api_name':'merge_duplicate_nodes', 'db_url':uri, 'userName':userName, 'database':database, + 'duplicate_nodes_list':duplicate_nodes_list, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.merge_duplicate_nodes(duplicate_nodes_list) @@ -682,6 +731,9 @@ async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), da @app.post("/drop_create_vector_index") async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(), isVectorIndexExist=Form()): try: + payload_json_obj = {'api_name':'drop_create_vector_index', 'db_url':uri, 'userName':userName, 'database':database, + 'isVectorIndexExist':isVectorIndexExist, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.drop_create_vector_index(isVectorIndexExist) @@ -698,6 +750,9 @@ async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), da @app.post("/retry_processing") async def retry_processing(uri=Form(), userName=Form(), password=Form(), database=Form(), file_name=Form(), retry_condition=Form()): try: + payload_json_obj = {'api_name':'retry_processing', 'db_url':uri, 'userName':userName, 'database':database, 'file_name':file_name,'retry_condition':retry_condition, + 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) await asyncio.to_thread(set_status_retry, graph,file_name,retry_condition) #set_status_retry(graph,file_name,retry_condition) @@ -714,14 +769,16 @@ async def retry_processing(uri=Form(), userName=Form(), password=Form(), databas @app.post('/metric') async def calculate_metric(question=Form(), context=Form(), answer=Form(), model=Form()): try: - result = await asyncio.to_thread(get_ragas_metrics, question, context, answer, model) - if result is None or "error" in result: - return create_api_response( - 'Failed', - message='Failed to calculate evaluation metrics.', - error=result.get("error", "Ragas evaluation returned null") - ) - return create_api_response('Success', data=result) + payload_json_obj = {'api_name':'metric', 'context':context, 'answer':answer, 'model':model, 'logging_time': formatted_time(datetime.now(timezone.utc))} + logger.log_struct(payload_json_obj, "INFO") + result = await asyncio.to_thread(get_ragas_metrics, question, context, answer, model) + if result is None or "error" in result: + return create_api_response( + 'Failed', + message='Failed to calculate evaluation metrics.', + error=result.get("error", "Ragas evaluation returned null") + ) + return create_api_response('Success', data=result) except Exception as e: job_status = "Failed" message = "Error while calculating evaluation metrics" From a8f821afed802405f7997300b4e6622c968429a2 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 18 Oct 2024 17:54:11 +0530 Subject: [PATCH 184/292] Adding Links to get neighboring nodes (#796) * addition of link * added neighbours query * implemented with driver * updated the query * communitiesInfo name change * communities.tsx removed * api integration * modified response * entities change * chunk and communities * chunk space removal * added element id to chunks * loading on click * format changes * added file name for Dcoumrnt node * chat token cut off model name update * icon change * duplicate sources removal * Entity change --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> --- backend/score.py | 20 ++ backend/src/chunkid_entities.py | 13 +- backend/src/neighbours.py | 63 ++++++ backend/src/shared/constants.py | 16 +- .../src/components/ChatBot/ChatInfoModal.tsx | 13 +- .../components/ChatBot/ChatModesSwitch.tsx | 2 + frontend/src/components/ChatBot/ChunkInfo.tsx | 135 ++++++++++-- .../components/ChatBot/CommonChatActions.tsx | 3 + .../src/components/ChatBot/Communities.tsx | 41 ---- .../components/ChatBot/CommunitiesInfo.tsx | 45 +++- .../src/components/ChatBot/EntitiesInfo.tsx | 102 +++++++-- .../src/components/ChatBot/SourcesInfo.tsx | 23 +- frontend/src/components/ChatBot/chatInfo.ts | 40 ++++ .../src/components/Graph/GraphViewButton.tsx | 6 +- .../src/components/Graph/GraphViewModal.tsx | 12 +- .../Deduplication/index.tsx | 202 ++++++++++------- .../DeleteTabForOrphanNodes/index.tsx | 205 ++++++++++-------- .../EntityExtractionSetting.tsx | 39 +++- frontend/src/services/GraphQuery.ts | 24 +- frontend/src/types.ts | 32 +++ frontend/src/utils/Constants.ts | 2 + 21 files changed, 762 insertions(+), 276 deletions(-) create mode 100644 backend/src/neighbours.py delete mode 100644 frontend/src/components/ChatBot/Communities.tsx create mode 100644 frontend/src/components/ChatBot/chatInfo.ts diff --git a/backend/score.py b/backend/score.py index 1efd46f86..7940815b2 100644 --- a/backend/score.py +++ b/backend/score.py @@ -17,6 +17,7 @@ from src.post_processing import create_vector_fulltext_indexes, create_entity_embedding from sse_starlette.sse import EventSourceResponse from src.communities import create_communities +from src.neighbours import get_neighbour_nodes import json from typing import List, Mapping from starlette.middleware.sessions import SessionMiddleware @@ -363,6 +364,25 @@ async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=F finally: gc.collect() +@app.post("/get_neighbours") +async def get_neighbours(uri=Form(),userName=Form(), password=Form(), database=Form(), elementId=Form(None)): + try: + start = time.time() + result = await asyncio.to_thread(get_neighbour_nodes,uri=uri, username=userName, password=password,database=database, element_id=elementId) + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'get_neighbours','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + logger.log_struct(json_obj, "INFO") + return create_api_response('Success',data=result,message=f"Total elapsed API time {elapsed_time:.2f}") + except Exception as e: + job_status = "Failed" + message="Unable to extract neighbour nodes for given element ID" + error_message = str(e) + logging.exception(f'Exception in get neighbours :{error_message}') + return create_api_response(job_status, message=message, error=error_message) + finally: + gc.collect() + @app.post("/graph_query") async def graph_query( uri: str = Form(), diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index 8bb9c2198..31ae07496 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -1,6 +1,7 @@ import logging from src.graph_query import * from src.shared.constants import * +import re def process_records(records): """ @@ -191,7 +192,11 @@ def get_entities_from_chunkids(uri, username, password, database ,nodedetails,en if "entitydetails" in nodedetails and nodedetails["entitydetails"]: entity_ids = [item["id"] for item in nodedetails["entitydetails"]] logging.info(f"chunkid_entities module: Starting for entity ids: {entity_ids}") - return process_entityids(driver, entity_ids) + result = process_entityids(driver, entity_ids) + if "chunk_data" in result.keys(): + for chunk in result["chunk_data"]: + chunk["text"] = re.sub(r'\s+', ' ', chunk["text"]) + return result else: logging.info("chunkid_entities module: No entity ids are passed") return default_response @@ -201,7 +206,11 @@ def get_entities_from_chunkids(uri, username, password, database ,nodedetails,en if "chunkdetails" in nodedetails and nodedetails["chunkdetails"]: chunk_ids = [item["id"] for item in nodedetails["chunkdetails"]] logging.info(f"chunkid_entities module: Starting for chunk ids: {chunk_ids}") - return process_chunkids(driver, chunk_ids, entities) + result = process_chunkids(driver, chunk_ids, entities) + if "chunk_data" in result.keys(): + for chunk in result["chunk_data"]: + chunk["text"] = re.sub(r'\s+', ' ', chunk["text"]) + return result else: logging.info("chunkid_entities module: No chunk ids are passed") return default_response diff --git a/backend/src/neighbours.py b/backend/src/neighbours.py new file mode 100644 index 000000000..08022ecc6 --- /dev/null +++ b/backend/src/neighbours.py @@ -0,0 +1,63 @@ +import logging +from src.graph_query import * + +NEIGHBOURS_FROM_ELEMENT_ID_QUERY = """ +MATCH (n) +WHERE elementId(n) = $element_id + +MATCH (n)<-[rels]->(m) +WITH n, + ([n] + COLLECT(DISTINCT m)) AS allNodes, + COLLECT(DISTINCT rels) AS allRels + +RETURN + [node IN allNodes | + node { + .*, + embedding: null, + text: null, + summary: null, + labels: [coalesce(apoc.coll.removeAll(labels(node), ['__Entity__'])[0], "*")], + element_id: elementId(node), + properties: { + id: CASE WHEN node.id IS NOT NULL THEN node.id ELSE node.fileName END + } + } + ] AS nodes, + + [r IN allRels | + { + start_node_element_id: elementId(startNode(r)), + end_node_element_id: elementId(endNode(r)), + type: type(r), + element_id: elementId(r) + } + ] AS relationships +""" + + +def get_neighbour_nodes(uri, username, password, database, element_id, query=NEIGHBOURS_FROM_ELEMENT_ID_QUERY): + driver = None + + try: + logging.info(f"Querying neighbours for element_id: {element_id}") + driver = get_graphDB_driver(uri, username, password, database) + driver.verify_connectivity() + logging.info("Database connectivity verified.") + + records, summary, keys = driver.execute_query(query,element_id=element_id) + nodes = records[0].get("nodes", []) + relationships = records[0].get("relationships", []) + result = {"nodes": nodes, "relationships": relationships} + + logging.info(f"Successfully retrieved neighbours for element_id: {element_id}") + return result + + except Exception as e: + logging.error(f"Error retrieving neighbours for element_id: {element_id}: {e}") + return {"nodes": [], "relationships": []} + + finally: + if driver is not None: + driver.close() + logging.info("Database driver closed.") \ No newline at end of file diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 33b27b20a..cde354f16 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -121,7 +121,7 @@ RETURN d AS doc, [chunk IN chunks | - chunk {.*, embedding: null} + chunk {.*, embedding: null, element_id: elementId(chunk)} ] AS chunks, [ node IN nodes | @@ -168,10 +168,10 @@ CHAT_EMBEDDING_FILTER_SCORE_THRESHOLD = 0.10 CHAT_TOKEN_CUT_OFF = { - ("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro", "gemini-1.5-flash","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, - ("openai-gpt-4","diffbot" ,'azure_ai_gpt_4o',"openai-gpt-4o", "openai-gpt-4o-mini") : 28, + ('openai_gpt_3.5','azure_ai_gpt_35',"gemini_1.0_pro","gemini_1.5_pro", "gemini_1.5_flash","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, + ("openai-gpt-4","diffbot" ,'azure_ai_gpt_4o',"openai_gpt_4o", "openai_gpt_4o_mini") : 28, ("ollama_llama3") : 2 -} +} ### CHAT TEMPLATES CHAT_SYSTEM_TEMPLATE = """ @@ -473,14 +473,16 @@ .*, embedding: null, fileName: d.fileName, - fileSource: d.fileSource + fileSource: d.fileSource, + element_id: elementId(c) } ] AS chunks, [ community IN communities WHERE community IS NOT NULL | community { .*, - embedding: null + embedding: null, + element_id:elementId(community) } ] AS communities, [ @@ -551,7 +553,7 @@ WHERE elementId(community) IN $communityids WITH collect(distinct community) AS communities RETURN [community IN communities | - community {.*, embedding: null, elementid: elementId(community)}] AS communities + community {.*, embedding: null, element_id: elementId(community)}] AS communities """ ## CHAT MODES diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 7fb819a32..266bf98ac 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -23,7 +23,7 @@ import { tokens } from '@neo4j-ndl/base'; import ChunkInfo from './ChunkInfo'; import EntitiesInfo from './EntitiesInfo'; import SourcesInfo from './SourcesInfo'; -import CommunitiesInfo from './Communities'; +import CommunitiesInfo from './CommunitiesInfo'; import { chatModeLables, supportedLLmsForRagas } from '../../utils/Constants'; import { Relationship } from '@neo4j-nvl/base'; import { getChatMetrics } from '../../services/GetRagasMetric'; @@ -74,6 +74,8 @@ const ChatInfoModal: React.FC = ({ const [copiedText, setcopiedText] = useState(false); const [showMetricsTable, setShowMetricsTable] = useState(Boolean(metricDetails)); + console.log('node', nodeDetails); + const actions: CypherCodeBlockProps['actions'] = useMemo( () => [ { @@ -348,9 +350,14 @@ const ChatInfoModal: React.FC = ({ <> )} - {activeTab == 4 && nodes?.length && relationships?.length ? ( + {activeTab == 4 && nodes?.length && relationships?.length && mode !== chatModeLables.graph ? ( - + ) : ( <> diff --git a/frontend/src/components/ChatBot/ChatModesSwitch.tsx b/frontend/src/components/ChatBot/ChatModesSwitch.tsx index 5bde10d26..a958b1a06 100644 --- a/frontend/src/components/ChatBot/ChatModesSwitch.tsx +++ b/frontend/src/components/ChatBot/ChatModesSwitch.tsx @@ -24,6 +24,7 @@ export default function ChatModesSwitch({ size='small' clean onClick={() => switchToOtherMode(currentModeIndex - 1)} + aria-label='left' > @@ -39,6 +40,7 @@ export default function ChatModesSwitch({ size='small' clean onClick={() => switchToOtherMode(currentModeIndex + 1)} + aria-label='right' > diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index 05fa229a1..7f31cd4d5 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -1,7 +1,7 @@ -import { FC, useContext } from 'react'; -import { ChunkProps } from '../../types'; +import { FC, useContext, useState } from 'react'; +import { ChunkProps, UserCredentials } from '../../types'; import { Box, LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; -import { DocumentTextIconOutline, GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; +import { DocumentTextIconOutline, GlobeAltIconOutline, MagnifyingGlassCircleIconSolid } from '@neo4j-ndl/react/icons'; import wikipedialogo from '../../assets/images/wikipedia.svg'; import youtubelogo from '../../assets/images/youtube.svg'; import gcslogo from '../../assets/images/gcs.webp'; @@ -10,9 +10,32 @@ import ReactMarkdown from 'react-markdown'; import { generateYouTubeLink, getLogo, isAllowedHost } from '../../utils/Utils'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; import { chatModeLables } from '../../utils/Constants'; +import { useCredentials } from '../../context/UserCredentials'; +import GraphViewModal from '../Graph/GraphViewModal'; +import { handleGraphNodeClick } from './chatInfo'; +import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; const ChunkInfo: FC = ({ loading, chunks, mode }) => { const themeUtils = useContext(ThemeWrapperContext); + const { userCredentials } = useCredentials(); + const [neoNodes, setNeoNodes] = useState([]); + const [neoRels, setNeoRels] = useState([]); + const [openGraphView, setOpenGraphView] = useState(false); + const [viewPoint, setViewPoint] = useState(''); + const [loadingGraphView, setLoadingGraphView] = useState(false); + + const handleChunkClick = async (elementId: string, viewMode: string) => { + handleGraphNodeClick( + userCredentials as UserCredentials, + elementId, + viewMode, + setNeoNodes, + setNeoRels, + setOpenGraphView, + setViewPoint, + setLoadingGraphView + ); + }; return ( <> @@ -28,23 +51,49 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.page_number ? ( <>
    - - - {chunk?.fileName} - + <> + handleChunkClick(chunk.element_id, 'Chunk')} + > + + + + + {chunk?.fileName} + +
    {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && - mode !== chatModeLables.graph && ( + mode !== chatModeLables.graph && + chunk.score && ( Similarity Score: {chunk?.score} )} +
    + Page: {chunk?.page_number} +
    ) : chunk?.url && chunk?.start_time ? ( <>
    + handleChunkClick(chunk.element_id, 'Chunk')} + > + + = ({ loading, chunks, mode }) => { ) : chunk?.url && new URL(chunk.url).host === 'wikipedia.org' ? ( <>
    + handleChunkClick(chunk.element_id, 'Chunk')} + > + + {chunk?.fileName}
    @@ -76,6 +135,16 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { ) : chunk?.url && new URL(chunk.url).host === 'storage.googleapis.com' ? ( <>
    + handleChunkClick(chunk.element_id, 'Chunk')} + > + + {chunk?.fileName}
    @@ -88,6 +157,16 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { ) : chunk?.url && chunk?.url.startsWith('s3://') ? ( <>
    + handleChunkClick(chunk.element_id, 'Chunk')} + > + + {chunk?.fileName}
    @@ -102,6 +181,16 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { !isAllowedHost(chunk?.url, ['storage.googleapis.com', 'wikipedia.org', 'youtube.com']) ? ( <>
    + handleChunkClick(chunk.element_id, 'Chunk')} + > + + {chunk?.url} @@ -116,6 +205,16 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { ) : ( <>
    + handleChunkClick(chunk.element_id, 'Chunk')} + > + + {chunk.fileSource === 'local file' ? ( ) : ( @@ -135,7 +234,9 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
    )} - {chunk?.text} +
    + {chunk?.text} +
    ))} @@ -143,8 +244,16 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { ) : ( No Chunks Found )} + {openGraphView && ( + + )} ); }; - export default ChunkInfo; diff --git a/frontend/src/components/ChatBot/CommonChatActions.tsx b/frontend/src/components/ChatBot/CommonChatActions.tsx index 80d904f6a..d1a5c6243 100644 --- a/frontend/src/components/ChatBot/CommonChatActions.tsx +++ b/frontend/src/components/ChatBot/CommonChatActions.tsx @@ -30,6 +30,7 @@ export default function CommonActions({ label='Retrieval Information' disabled={chat.isTyping || chat.isLoading} onClick={() => detailsHandler(chat, activeChat)} + aria-label='Retrieval Information' > {buttonCaptions.details} @@ -40,6 +41,7 @@ export default function CommonActions({ text={chat.copying ? tooltips.copied : tooltips.copy} onClick={() => copyHandler(chat.modes[chat.currentMode]?.message, chat.id)} disabled={chat.isTyping || chat.isLoading} + aria-label='copy text' > @@ -50,6 +52,7 @@ export default function CommonActions({ text={chat.speaking ? tooltips.stopSpeaking : tooltips.textTospeech} disabled={listMessages.some((msg) => msg.speaking && msg.id !== chat.id)} label={chat.speaking ? 'stop speaking' : 'text to speech'} + aria-label='speech' > {chat.speaking ? : } diff --git a/frontend/src/components/ChatBot/Communities.tsx b/frontend/src/components/ChatBot/Communities.tsx deleted file mode 100644 index 11869d3d4..000000000 --- a/frontend/src/components/ChatBot/Communities.tsx +++ /dev/null @@ -1,41 +0,0 @@ -import { Box, LoadingSpinner, Flex, Typography } from '@neo4j-ndl/react'; -import { FC } from 'react'; -import ReactMarkdown from 'react-markdown'; -import { CommunitiesProps } from '../../types'; - -const CommunitiesInfo: FC = ({ loading, communities }) => { - console.log('communities', communities); - return ( - <> - {loading ? ( - - - - ) : communities?.length > 0 ? ( -
    -
      - {communities.map((community, index) => ( -
    • -
      - - ID : - {community.id} - - - Score : - {community.score} - - {community.summary} -
      -
    • - ))} -
    -
    - ) : ( - No Communities Found - )} - - ); -}; - -export default CommunitiesInfo; diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index 088e4e370..f562a4ed4 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -1,10 +1,33 @@ -import { Box, LoadingSpinner, Flex, Typography } from '@neo4j-ndl/react'; -import { FC } from 'react'; +import { Box, LoadingSpinner, Flex, Typography, TextLink } from '@neo4j-ndl/react'; +import { FC, useState } from 'react'; import ReactMarkdown from 'react-markdown'; -import { CommunitiesProps } from '../../types'; +import { CommunitiesProps, UserCredentials } from '../../types'; import { chatModeLables } from '../../utils/Constants'; +import { useCredentials } from '../../context/UserCredentials'; +import GraphViewModal from '../Graph/GraphViewModal'; +import { handleGraphNodeClick } from './chatInfo'; const CommunitiesInfo: FC = ({ loading, communities, mode }) => { + const { userCredentials } = useCredentials(); + const [neoNodes, setNeoNodes] = useState([]); + const [neoRels, setNeoRels] = useState([]); + const [openGraphView, setOpenGraphView] = useState(false); + const [viewPoint, setViewPoint] = useState(''); + const [loadingGraphView, setLoadingGraphView] = useState(false); + + const handleCommunityClick = (elementId: string, viewMode: string) => { + handleGraphNodeClick( + userCredentials as UserCredentials, + elementId, + viewMode, + setNeoNodes, + setNeoRels, + setOpenGraphView, + setViewPoint, + setLoadingGraphView + ); + }; + return ( <> {loading ? ( @@ -18,8 +41,11 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) =
  • - ID : - {community.id} + handleCommunityClick(community.element_id, 'chatInfoView')} + >{`ID : ${community.id}`} {mode === chatModeLables.global_vector && community.score && ( @@ -36,6 +62,15 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = ) : ( No Communities Found )} + {openGraphView && ( + + )} ); }; diff --git a/frontend/src/components/ChatBot/EntitiesInfo.tsx b/frontend/src/components/ChatBot/EntitiesInfo.tsx index 6c6e0784e..2b1ff3cca 100644 --- a/frontend/src/components/ChatBot/EntitiesInfo.tsx +++ b/frontend/src/components/ChatBot/EntitiesInfo.tsx @@ -1,11 +1,21 @@ -import { Box, GraphLabel, LoadingSpinner, Typography } from '@neo4j-ndl/react'; -import { FC, useMemo } from 'react'; -import { EntitiesProps, GroupedEntity } from '../../types'; +import { Box, GraphLabel, LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; +import { FC, useMemo, useState } from 'react'; +import { EntitiesProps, GroupedEntity, UserCredentials } from '../../types'; import { calcWordColor } from '@neo4j-devtools/word-color'; import { graphLabels } from '../../utils/Constants'; import { parseEntity } from '../../utils/Utils'; +import { useCredentials } from '../../context/UserCredentials'; +import GraphViewModal from '../Graph/GraphViewModal'; +import { handleGraphNodeClick } from './chatInfo'; const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, infoEntities }) => { + const { userCredentials } = useCredentials(); + const [neoNodes, setNeoNodes] = useState([]); + const [neoRels, setNeoRels] = useState([]); + const [openGraphView, setOpenGraphView] = useState(false); + const [viewPoint, setViewPoint] = useState(''); + const [loadingGraphView, setLoadingGraphView] = useState(false); + const groupedEntities = useMemo<{ [key: string]: GroupedEntity }>(() => { const items = infoEntities.reduce((acc, entity) => { const { label, text } = parseEntity(entity); @@ -18,7 +28,6 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in }, {} as Record; color: string }>); return items; }, [infoEntities]); - const labelCounts = useMemo(() => { const counts: { [label: string]: number } = {}; for (let index = 0; index < infoEntities?.length; index++) { @@ -33,26 +42,55 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in const sortedLabels = useMemo(() => { return Object.keys(labelCounts).sort((a, b) => labelCounts[b] - labelCounts[a]); }, [labelCounts]); + + const handleEntityClick = async (elementId: string, viewMode: string) => { + handleGraphNodeClick( + userCredentials as UserCredentials, + elementId, + viewMode, + setNeoNodes, + setNeoRels, + setOpenGraphView, + setViewPoint, + setLoadingGraphView + ); + }; + return ( <> {loading ? ( - ) : Object.keys(groupedEntities)?.length > 0 || Object.keys(graphonly_entities)?.length > 0 ? ( + ) : (mode !== 'graph' && Object.keys(groupedEntities)?.length > 0) || + (mode == 'graph' && Object.keys(graphonly_entities)?.length > 0) ? (
      {mode == 'graph' ? graphonly_entities.map((label, index) => (
    • -
      - { - // @ts-ignore - label[Object.keys(label)[0]].id ?? Object.keys(label)[0] - } -
      +
        + {Object.keys(label).map((key) => ( +
      • + + {key} + + + { + // @ts-ignore + label[key].id ?? label[key] + } + +
      • + ))} +
    • )) : sortedLabels.map((label, index) => { @@ -62,20 +100,32 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in key={index} className='flex items-center mb-2 text-ellipsis whitespace-nowrap max-w-[100%)] overflow-hidden' > - e.preventDefault()} - > + {label === '__Community__' ? graphLabels.community : label} ({labelCounts[label]}) - {Array.from(entity.texts).slice(0, 3).join(', ')} + {Array.from(entity.texts) + .slice(0, 3) + .map((text, idx) => { + const matchingEntity = infoEntities.find( + (e) => e.labels.includes(label) && parseEntity(e).text === text + ); + const textId = matchingEntity?.element_id; + return ( + + handleEntityClick(textId!, 'chatInfoView')} + className={loadingGraphView ? 'cursor-wait' : 'cursor-pointer'} + > + {text} + + {Array.from(entity.texts).length > 1 ? ',' : ''} + + ); + })} ); @@ -84,8 +134,16 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in ) : ( No Entities Found )} + {openGraphView && ( + + )} ); }; - export default EntitiesInfo; diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx index 91fee511f..79d710fa2 100644 --- a/frontend/src/components/ChatBot/SourcesInfo.tsx +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -1,5 +1,5 @@ import { FC, useContext } from 'react'; -import { SourcesProps } from '../../types'; +import { Chunk, SourcesProps } from '../../types'; import { Box, LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; import { DocumentTextIconOutline, GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; import { getLogo, isAllowedHost, youtubeLinkValidation } from '../../utils/Utils'; @@ -10,17 +10,31 @@ import youtubelogo from '../../assets/images/youtube.svg'; import gcslogo from '../../assets/images/gcs.webp'; import s3logo from '../../assets/images/s3logo.png'; +const filterUniqueChunks = (chunks: Chunk[]) => { + const chunkSource = new Set(); + return chunks.filter(chunk => { + const sourceCheck = `${chunk.fileName}-${chunk.fileSource}`; + if (chunkSource.has(sourceCheck)) { + return false; + } else { + chunkSource.add(sourceCheck); + return true; + } + }); +}; + const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { const themeUtils = useContext(ThemeWrapperContext); + const uniqueChunks = chunks ? filterUniqueChunks(chunks) : []; return ( <> {loading ? ( - ) : mode === 'entity search+vector' && chunks?.length ? ( + ) : mode === 'entity search+vector' && uniqueChunks.length ? (
        - {chunks + {uniqueChunks .map((c) => ({ fileName: c.fileName, fileSource: c.fileSource })) .map((s, index) => { return ( @@ -133,5 +147,4 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { ); }; - -export default SourcesInfo; +export default SourcesInfo; \ No newline at end of file diff --git a/frontend/src/components/ChatBot/chatInfo.ts b/frontend/src/components/ChatBot/chatInfo.ts new file mode 100644 index 000000000..821cf69f8 --- /dev/null +++ b/frontend/src/components/ChatBot/chatInfo.ts @@ -0,0 +1,40 @@ +import { getNeighbors } from '../../services/GraphQuery'; +import { NeoNode, NeoRelationship, UserCredentials } from '../../types'; + +export const handleGraphNodeClick = async ( + userCredentials: UserCredentials, + elementId: string, + viewMode: string, + setNeoNodes: React.Dispatch>, + setNeoRels: React.Dispatch>, + setOpenGraphView: React.Dispatch>, + setViewPoint: React.Dispatch>, + setLoadingGraphView?: React.Dispatch> +) => { + if (setLoadingGraphView) { + setLoadingGraphView(true); + } + try { + const result = await getNeighbors(userCredentials, elementId); + if (result && result.data.data.nodes.length > 0) { + let { nodes } = result.data.data; + if (viewMode === 'Chunk') { + nodes = nodes.filter((node: NeoNode) => node.labels.length === 1 && node.properties.id !== null); + } + const nodeIds = new Set(nodes.map((node: NeoNode) => node.element_id)); + const relationships = result.data.data.relationships.filter( + (rel: NeoRelationship) => nodeIds.has(rel.end_node_element_id) && nodeIds.has(rel.start_node_element_id) + ); + setNeoNodes(nodes); + setNeoRels(relationships); + setOpenGraphView(true); + setViewPoint('chatInfoView'); + } + } catch (error: any) { + console.error('Error fetching neighbors:', error); + } finally { + if (setLoadingGraphView) { + setLoadingGraphView(false); + } + } +}; diff --git a/frontend/src/components/Graph/GraphViewButton.tsx b/frontend/src/components/Graph/GraphViewButton.tsx index 50eb13972..8f051ce0b 100644 --- a/frontend/src/components/Graph/GraphViewButton.tsx +++ b/frontend/src/components/Graph/GraphViewButton.tsx @@ -3,7 +3,7 @@ import { Button } from '@neo4j-ndl/react'; import GraphViewModal from './GraphViewModal'; import { GraphViewButtonProps } from '../../types'; -const GraphViewButton: React.FC = ({ nodeValues, relationshipValues }) => { +const GraphViewButton: React.FC = ({ nodeValues, relationshipValues, fill, label }) => { const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); @@ -13,7 +13,9 @@ const GraphViewButton: React.FC = ({ nodeValues, relations }; return ( <> - + = ({ if (open) { setLoading(true); setGraphType([]); - if (viewPoint !== 'chatInfoView') { + if (viewPoint !== graphLabels.chatInfoView) { graphApi(); } else { const { finalNodes, finalRels, schemeVal } = processGraphData(nodeValues ?? [], relationshipValues ?? []); @@ -176,7 +176,9 @@ const GraphViewModal: React.FunctionComponent = ({ }, [open]); useEffect(() => { - handleSearch(debouncedQuery); + if (debouncedQuery) { + handleSearch(debouncedQuery); + } }, [debouncedQuery]); const initGraph = ( @@ -244,7 +246,7 @@ const GraphViewModal: React.FunctionComponent = ({ setNodes(updatedNodes); setRelationships(updatedRelationships); }, - [nodes] + [nodes, relationships] ); // Unmounting the component @@ -377,7 +379,7 @@ const GraphViewModal: React.FunctionComponent = ({
        - ) : graphType.length === 0 ? ( + ) : graphType.length === 0 && checkBoxView ? (
        diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index f5a021e30..05facb141 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -12,13 +12,24 @@ import { Row, getSortedRowModel, } from '@tanstack/react-table'; -import { Checkbox, DataGrid, DataGridComponents, Flex, Tag, Typography, useMediaQuery } from '@neo4j-ndl/react'; +import { + Checkbox, + DataGrid, + DataGridComponents, + Flex, + Tag, + TextLink, + Typography, + useMediaQuery, +} from '@neo4j-ndl/react'; import Legend from '../../../UI/Legend'; import { DocumentIconOutline } from '@neo4j-ndl/react/icons'; import { calcWordColor } from '@neo4j-devtools/word-color'; import ButtonWithToolTip from '../../../UI/ButtonWithToolTip'; import mergeDuplicateNodes from '../../../../services/MergeDuplicateEntities'; import { tokens } from '@neo4j-ndl/base'; +import GraphViewModal from '../../../Graph/GraphViewModal'; +import { handleGraphNodeClick } from '../../../ChatBot/chatInfo'; export default function DeduplicationTab() { const { breakpoints } = tokens; @@ -30,6 +41,11 @@ export default function DeduplicationTab() { const [isLoading, setLoading] = useState(false); const [mergeAPIloading, setmergeAPIloading] = useState(false); const tableRef = useRef(null); + const [neoNodes, setNeoNodes] = useState([]); + const [neoRels, setNeoRels] = useState([]); + const [openGraphView, setOpenGraphView] = useState(false); + const [viewPoint, setViewPoint] = useState(''); + const fetchDuplicateNodes = useCallback(async () => { try { setLoading(true); @@ -89,6 +105,19 @@ export default function DeduplicationTab() { ); }); }; + + const handleDuplicateNodeClick = async (elementId: string, viewMode: string) => { + handleGraphNodeClick( + userCredentials as UserCredentials, + elementId, + viewMode, + setNeoNodes, + setNeoRels, + setOpenGraphView, + setViewPoint + ); + }; + const columns = useMemo( () => [ { @@ -121,7 +150,13 @@ export default function DeduplicationTab() { cell: (info) => { return (
        - {info.getValue()} + handleDuplicateNodeClick(info.row.id, 'chatInfoView')} + title={info.getValue()} + > + {info.getValue()} +
        ); }, @@ -225,83 +260,94 @@ export default function DeduplicationTab() { ? `Merge Duplicate Nodes (${table.getSelectedRowModel().rows.length})` : 'Select Node(s) to Merge'; return ( -
        - - - - Refine Your Knowledge Graph: Merge Duplicate Entities: - - - Identify and merge similar entries like "Apple" and "Apple Inc." to eliminate redundancy and improve the - accuracy and clarity of your knowledge graph. - + <> +
        + + + + Refine Your Knowledge Graph: Merge Duplicate Entities: + + + Identify and merge similar entries like "Apple" and "Apple Inc." to eliminate redundancy and improve the + accuracy and clarity of your knowledge graph. + + + {duplicateNodes.length > 0 && ( + + Total Duplicate Nodes: {duplicateNodes.length} + + )} - {duplicateNodes.length > 0 && ( - - Total Duplicate Nodes: {duplicateNodes.length} - - )} - - , - PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { - return ( - - ); - }, - }} - /> - - { - await clickHandler(); - await fetchDuplicateNodes(); + - {selectedFilesCheck} - - -
        + rootProps={{ + className: 'max-h-[355px] !overflow-y-auto', + }} + isLoading={isLoading} + components={{ + Body: (props) => , + PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { + return ( + + ); + }, + }} + /> + + { + await clickHandler(); + await fetchDuplicateNodes(); + }} + size='large' + loading={mergeAPIloading} + text={ + isLoading + ? 'Fetching Duplicate Nodes' + : !isLoading && !duplicateNodes.length + ? 'No Nodes Found' + : !table.getSelectedRowModel().rows.length + ? 'No Nodes Selected' + : mergeAPIloading + ? 'Merging' + : `Merge Selected Nodes (${table.getSelectedRowModel().rows.length})` + } + label='Merge Duplicate Node Button' + disabled={!table.getSelectedRowModel().rows.length} + placement='top' + > + {selectedFilesCheck} + + +
        + {openGraphView && ( + + )} + ); } diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx index 24de576c5..bcc2597f1 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx @@ -1,4 +1,4 @@ -import { Checkbox, DataGrid, DataGridComponents, Flex, Typography, useMediaQuery } from '@neo4j-ndl/react'; +import { Checkbox, DataGrid, DataGridComponents, Flex, TextLink, Typography, useMediaQuery } from '@neo4j-ndl/react'; import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; import { UserCredentials, orphanNodeProps } from '../../../../types'; import { getOrphanNodes } from '../../../../services/GetOrphanNodes'; @@ -19,6 +19,8 @@ import { } from '@tanstack/react-table'; import DeletePopUp from '../../DeletePopUp/DeletePopUp'; import { tokens } from '@neo4j-ndl/base'; +import GraphViewModal from '../../../Graph/GraphViewModal'; +import { handleGraphNodeClick } from '../../../ChatBot/chatInfo'; export default function DeletePopUpForOrphanNodes({ deleteHandler, loading, @@ -35,6 +37,10 @@ export default function DeletePopUpForOrphanNodes({ const [rowSelection, setRowSelection] = useState>({}); const tableRef = useRef(null); const [showDeletePopUp, setshowDeletePopUp] = useState(false); + const [neoNodes, setNeoNodes] = useState([]); + const [neoRels, setNeoRels] = useState([]); + const [openGraphView, setOpenGraphView] = useState(false); + const [viewPoint, setViewPoint] = useState(''); const fetchOrphanNodes = useCallback(async () => { try { @@ -66,6 +72,18 @@ export default function DeletePopUpForOrphanNodes({ }, [userCredentials]); const columnHelper = createColumnHelper(); + const handleOrphanNodeClick = (elementId: string, viewMode: string) => { + handleGraphNodeClick( + userCredentials as UserCredentials, + elementId, + viewMode, + setNeoNodes, + setNeoRels, + setOpenGraphView, + setViewPoint + ); + }; + const columns = useMemo( () => [ { @@ -98,7 +116,13 @@ export default function DeletePopUpForOrphanNodes({ cell: (info) => { return (
        - {info.getValue()} + handleOrphanNodeClick(info.row.id, 'chatInfoView')} + title={info.getValue()} + > + {info.getValue()} +
        ); }, @@ -190,94 +214,105 @@ export default function DeletePopUpForOrphanNodes({ }; return ( -
        - {showDeletePopUp && ( - setshowDeletePopUp(false)} - loading={loading} - view='settingsView' - /> - )} + <>
        - - - - Orphan Nodes Deletion (100 nodes per batch) - - {totalOrphanNodes > 0 && ( + {showDeletePopUp && ( + setshowDeletePopUp(false)} + loading={loading} + view='settingsView' + /> + )} +
        + + - Total Nodes: {totalOrphanNodes} + Orphan Nodes Deletion (100 nodes per batch) - )} - - - - This feature helps improve the accuracy of your knowledge graph by identifying and removing entities that - are not connected to any other information. These "lonely" entities can be remnants of past analyses or - errors in data processing. By removing them, we can create a cleaner and more efficient knowledge graph - that leads to more relevant and informative responses. - + {totalOrphanNodes > 0 && ( + + Total Nodes: {totalOrphanNodes} + + )} + + + + This feature helps improve the accuracy of your knowledge graph by identifying and removing entities + that are not connected to any other information. These "lonely" entities can be remnants of past + analyses or errors in data processing. By removing them, we can create a cleaner and more efficient + knowledge graph that leads to more relevant and informative responses. + + +
        + , + PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { + return ( + + ); + }, + }} + /> + + setshowDeletePopUp(true)} + size='large' + loading={loading} + text={ + isLoading + ? 'Fetching Orphan Nodes' + : !isLoading && !orphanNodes.length + ? 'No Nodes Found' + : !table.getSelectedRowModel().rows.length + ? 'No Nodes Selected' + : `Delete Selected Nodes (${table.getSelectedRowModel().rows.length})` + } + label='Orphan Node deletion button' + disabled={!table.getSelectedRowModel().rows.length} + placement='top' + > + {selectedFilesCheck} +
        - , - PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { - return ( - - ); - }, - }} - /> - - setshowDeletePopUp(true)} - size='large' - loading={loading} - text={ - isLoading - ? 'Fetching Orphan Nodes' - : !isLoading && !orphanNodes.length - ? 'No Nodes Found' - : !table.getSelectedRowModel().rows.length - ? 'No Nodes Selected' - : `Delete Selected Nodes (${table.getSelectedRowModel().rows.length})` - } - label='Orphan Node deletion button' - disabled={!table.getSelectedRowModel().rows.length} - placement='top' - > - {selectedFilesCheck} - - -
        + {openGraphView && ( + + )} + ); } diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx index 8a7362d26..e5e0bc033 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx @@ -244,6 +244,26 @@ export default function EntityExtractionSetting({ onClose(); } }; + const handleApply = () => { + setIsSchema(true); + localStorage.setItem('isSchema', JSON.stringify(true)); + showNormalToast(`Successfully Applied the Schema settings`); + if (view === 'Dialog' && onClose != undefined) { + onClose(); + } + localStorage.setItem( + 'selectedNodeLabels', + JSON.stringify({ db: userCredentials?.uri, selectedOptions: selectedNodes }) + ); + localStorage.setItem( + 'selectedRelationshipLabels', + JSON.stringify({ db: userCredentials?.uri, selectedOptions: selectedRels }) + ); + localStorage.setItem( + 'selectedSchemas', + JSON.stringify({ db: userCredentials?.uri, selectedOptions: selectedSchemas }) + ); + }; // Load selectedSchemas from local storage on mount useEffect(() => { @@ -297,9 +317,8 @@ export default function EntityExtractionSetting({ options: nodeLabelOptions, onChange: onChangenodes, value: selectedNodes, - classNamePrefix: `${ - isTablet ? 'tablet_entity_extraction_Tab_node_label' : 'entity_extraction_Tab_node_label' - }`, + classNamePrefix: `${isTablet ? 'tablet_entity_extraction_Tab_node_label' : 'entity_extraction_Tab_node_label' + }`, }} type='creatable' /> @@ -313,9 +332,8 @@ export default function EntityExtractionSetting({ options: relationshipTypeOptions, onChange: onChangerels, value: selectedRels, - classNamePrefix: `${ - isTablet ? 'tablet_entity_extraction_Tab_relationship_label' : 'entity_extraction_Tab_relationship_label' - }`, + classNamePrefix: `${isTablet ? 'tablet_entity_extraction_Tab_relationship_label' : 'entity_extraction_Tab_relationship_label' + }`, }} type='creatable' /> @@ -372,6 +390,15 @@ export default function EntityExtractionSetting({ {buttonCaptions.clearSettings} )} + + {buttonCaptions.applyGraphSchema} +
    diff --git a/frontend/src/services/GraphQuery.ts b/frontend/src/services/GraphQuery.ts index f792f4aec..d9277a504 100644 --- a/frontend/src/services/GraphQuery.ts +++ b/frontend/src/services/GraphQuery.ts @@ -1,7 +1,7 @@ import { UserCredentials } from '../types'; import api from '../API/Index'; -const graphQueryAPI = async ( +export const graphQueryAPI = async ( userCredentials: UserCredentials, query_type: string, document_names: (string | undefined)[] | undefined @@ -26,4 +26,24 @@ const graphQueryAPI = async ( throw error; } }; -export default graphQueryAPI; + +export const getNeighbors = async (userCredentials: UserCredentials, elementId: string) => { + try { + const formData = new FormData(); + formData.append('uri', userCredentials?.uri ?? ''); + formData.append('database', userCredentials?.database ?? ''); + formData.append('userName', userCredentials?.userName ?? ''); + formData.append('password', userCredentials?.password ?? ''); + formData.append('elementId', elementId); + + const response = await api.post(`/get_neighbours`, formData, { + headers: { + 'Content-Type': 'multipart/form-data', + }, + }); + return response; + } catch (error) { + console.log('Error Posting the Question:', error); + throw error; + } +}; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 0f8f913d3..6e33515d9 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -517,6 +517,7 @@ export type Community = { level: number; community_rank: number; score?: number; + element_id: string; }; export type GroupedEntity = { texts: Set; @@ -556,6 +557,7 @@ export interface Chunk { fileSource: string; score?: string; fileType: string; + element_id: string; } export interface SpeechSynthesisProps { @@ -610,6 +612,18 @@ export interface ExtendedNode extends Node { }; } +export interface NeoNode { + element_id: string; + labels: string[]; + properties: Record; +} +export interface NeoRelationship { + element_id: string; + start_node_element_id: string; + end_node_element_id: string; + type: string; +} + export interface ExtendedRelationship extends Relationship { count?: number; } @@ -657,6 +671,9 @@ export interface S3File { export interface GraphViewButtonProps { nodeValues?: ExtendedNode[]; relationshipValues?: ExtendedRelationship[]; + fill?: 'text' | 'filled' | 'outlined'; + label: string; + viewType: string; } export interface DrawerChatbotProps { isExpanded: boolean; @@ -712,6 +729,9 @@ export type CommunitiesProps = { loading: boolean; communities: Community[]; mode: string; + + // nodeValues: ExtendedNode[]; + // relationshipValues: ExtendedRelationship[]; }; export interface entity { @@ -804,3 +824,15 @@ export type GraphPropertiesPanelProps = { inspectedItem: BasicNode | BasicRelationship; newScheme: Scheme; }; + +export interface GraphViewHandlerProps { + nodeValues?: ExtendedNode[]; + relationshipValues?: ExtendedRelationship[]; + fill?: 'text' | 'filled' | 'outlined'; + label?: string; + viewType?: string; + buttonLabel: string; + graphonly_entities?: []; + entityInfo?: Entity[]; + mode?: string; +} diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 971227724..bd5890c91 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -112,6 +112,7 @@ export const tooltips = { clearChat: 'Clear Chat History', continue: 'Continue', clearGraphSettings: 'Clear configured Graph Schema', + applySettings:'Apply Graph Schema' }; export const buttonCaptions = { @@ -135,6 +136,7 @@ export const buttonCaptions = { continueSettings: 'Continue', clearSettings: 'Clear Schema', ask: 'Ask', + applyGraphSchema:'Apply' }; export const POST_PROCESSING_JOBS: { title: string; description: string }[] = [ From 6c6da261770a5603150313990a158d57f208eb92 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Fri, 18 Oct 2024 17:56:04 +0530 Subject: [PATCH 185/292] added error message for doc retriver (#807) --- backend/score.py | 3 +-- backend/src/QA_integration.py | 5 +++-- backend/src/communities.py | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/backend/score.py b/backend/score.py index 7940815b2..130cd1af2 100644 --- a/backend/score.py +++ b/backend/score.py @@ -290,8 +290,7 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database logging.info(f'Entity Embeddings created') if "enable_communities" in tasks: - model = "openai_gpt_4o" - await asyncio.to_thread(create_communities, uri, userName, password, database,model) + await asyncio.to_thread(create_communities, uri, userName, password, database) josn_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logging.info(f'created communities') diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index cf7c74f6d..27849fc20 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -276,8 +276,9 @@ def retrieve_documents(doc_retriever, messages): logging.info(f"Documents retrieved in {doc_retrieval_time:.2f} seconds") except Exception as e: - logging.error(f"Error retrieving documents: {e}") - raise + error_message = f"Error retrieving documents: {str(e)}" + logging.error(error_message) + raise RuntimeError(error_message) return docs,transformed_question diff --git a/backend/src/communities.py b/backend/src/communities.py index 1b19c689b..d1130150c 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -13,7 +13,8 @@ NODE_PROJECTION_ENTITY = "__Entity__" MAX_WORKERS = 10 MAX_COMMUNITY_LEVELS = 3 -MIN_COMMUNITY_SIZE = 1 +MIN_COMMUNITY_SIZE = 1 +COMMUNITY_CREATION_DEFAULT_MODEL = "openai_gpt_4o" CREATE_COMMUNITY_GRAPH_PROJECTION = """ @@ -466,7 +467,7 @@ def clear_communities(gds): raise -def create_communities(uri, username, password, database,model): +def create_communities(uri, username, password, database,model=COMMUNITY_CREATION_DEFAULT_MODEL): try: gds = get_gds_driver(uri, username, password, database) clear_communities(gds) From 3d587f0bab625fcfe53e0621d626a23f3ac6804a Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 18 Oct 2024 18:04:59 +0530 Subject: [PATCH 186/292] copy row (#803) * copy row * column for copy * column copy --- frontend/src/components/FileTable.tsx | 63 ++++++++++++--------------- 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index e5aa5ada4..ad0f7bc19 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -71,6 +71,7 @@ const FileTable = forwardRef((props, ref) => { const skipPageResetRef = useRef(false); const [_, copy] = useCopyToClipboard(); const { colorMode } = useContext(ThemeWrapperContext); + const [copyRow, setCopyRow] = useState(false); const tableRef = useRef(null); @@ -83,8 +84,13 @@ const FileTable = forwardRef((props, ref) => { } ); - const handleCopy = (message: string) => { - copy(message); + const handleCopy = (rowData: any) => { + const rowString = JSON.stringify(rowData, null, 2); + copy(rowString); + setCopyRow(true); + setTimeout(() => { + setCopyRow(false); + }, 5000); }; const columns = useMemo( () => [ @@ -154,18 +160,14 @@ const FileTable = forwardRef((props, ref) => { cell: (info) => { if (info.getValue() != 'Processing') { return ( - -
    - - - {info.getValue()} - - {(info.getValue() === 'Completed' || - info.getValue() === 'Failed' || - (info.getValue() === 'Cancelled' && !isReadOnlyUser)) && ( +
    + + {info.getValue()} + {(info.getValue() === 'Completed' || info.getValue() === 'Failed' || info.getValue() === 'Cancelled') && + !isReadOnlyUser && ( ((props, ref) => { )} -
    - - {info.row.original?.status === 'Failed' && ( - - handleCopy(info.row.original?.errorMessage ?? '')} - > - - - - )} - +
    ); } else if (info.getValue() === 'Processing' && info.row.original.processingProgress === undefined) { return ( @@ -531,9 +515,20 @@ const FileTable = forwardRef((props, ref) => { > + handleCopy(info.row.original)} + > + + ), - header: () => View, + header: () => Actions, footer: (info) => info.column.id, }), ], From 845bfb75d9fde846f9eab9b3284bc2c0a4f20dc1 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 18 Oct 2024 18:12:09 +0530 Subject: [PATCH 187/292] Raga's Evaluation For Multi Modes (#806) * Updatedmodels for ragas eval * context utilization metrics removed * updated supported llms for ragas * removed context utilization * Implemented Parallel API * multi api calls error resolved * MultiMode Metrics * Fix: Metric Evalution For Single Mode * multi modes ragas evaluation * api payload changes * metric api output format changed * multi mode ragas changes * removed pre process dataset * api response changes * Multimode metrics api integration * nan error for no answer resolved * QA integration changes --------- Co-authored-by: kaustubh-darekar --- backend/score.py | 42 +++-- backend/src/QA_integration.py | 2 +- backend/src/ragas_eval.py | 78 ++------- .../src/components/ChatBot/ChatInfoModal.tsx | 156 ++++++++++++------ frontend/src/components/ChatBot/Chatbot.tsx | 74 +++++---- .../src/components/ChatBot/MetricsTab.tsx | 22 ++- .../components/ChatBot/MultiModeMetrics.tsx | 116 +++++++++++++ frontend/src/services/GetRagasMetric.ts | 13 +- frontend/src/types.ts | 36 +++- frontend/src/utils/Constants.ts | 15 +- frontend/src/utils/Utils.ts | 11 ++ 11 files changed, 378 insertions(+), 187 deletions(-) create mode 100644 frontend/src/components/ChatBot/MultiModeMetrics.tsx diff --git a/backend/score.py b/backend/score.py index 130cd1af2..d78a13746 100644 --- a/backend/score.py +++ b/backend/score.py @@ -786,24 +786,34 @@ async def retry_processing(uri=Form(), userName=Form(), password=Form(), databas gc.collect() @app.post('/metric') -async def calculate_metric(question=Form(), context=Form(), answer=Form(), model=Form()): +async def calculate_metric(question: str = Form(), + context: str = Form(), + answer: str = Form(), + model: str = Form(), + mode: str = Form()): try: - payload_json_obj = {'api_name':'metric', 'context':context, 'answer':answer, 'model':model, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") - result = await asyncio.to_thread(get_ragas_metrics, question, context, answer, model) - if result is None or "error" in result: - return create_api_response( - 'Failed', - message='Failed to calculate evaluation metrics.', - error=result.get("error", "Ragas evaluation returned null") - ) - return create_api_response('Success', data=result) + context_list = [str(item).strip() for item in json.loads(context)] if context else [] + answer_list = [str(item).strip() for item in json.loads(answer)] if answer else [] + mode_list = [str(item).strip() for item in json.loads(mode)] if mode else [] + + result = await asyncio.to_thread( + get_ragas_metrics, question, context_list, answer_list, model + ) + if result is None or "error" in result: + return create_api_response( + 'Failed', + message='Failed to calculate evaluation metrics.', + error=result.get("error", "Ragas evaluation returned null") + ) + data = {mode: {metric: result[metric][i] for metric in result} for i, mode in enumerate(mode_list)} + return create_api_response('Success', data=data) except Exception as e: - job_status = "Failed" - message = "Error while calculating evaluation metrics" - error_message = str(e) - logging.exception(f'{error_message}') - return create_api_response(job_status, message=message, error=error_message) + logging.exception(f"Error while calculating evaluation metrics: {e}") + return create_api_response( + 'Failed', + message="Error while calculating evaluation metrics", + error=str(e) + ) finally: gc.collect() diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 27849fc20..468069531 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -435,7 +435,7 @@ def process_chat_response(messages, history, question, model, graph, document_na total_tokens = 0 formatted_docs = "" - question = transformed_question if transformed_question else question + # question = transformed_question if transformed_question else question # metrics = get_ragas_metrics(question,formatted_docs,content) # print(metrics) diff --git a/backend/src/ragas_eval.py b/backend/src/ragas_eval.py index e177b6d61..a5ce06b5b 100644 --- a/backend/src/ragas_eval.py +++ b/backend/src/ragas_eval.py @@ -1,88 +1,44 @@ import os import logging import time -from typing import Dict, Tuple, Optional -import boto3 +from src.llm import get_llm from datasets import Dataset from dotenv import load_dotenv -from langchain_anthropic import ChatAnthropic -from langchain_aws import ChatBedrock -from langchain_community.chat_models import ChatOllama -from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer -from langchain_fireworks import ChatFireworks -from langchain_google_vertexai import ( - ChatVertexAI, - HarmBlockThreshold, - HarmCategory, -) -from langchain_groq import ChatGroq -from langchain_openai import AzureChatOpenAI, ChatOpenAI from ragas import evaluate -from ragas.metrics import answer_relevancy, context_utilization, faithfulness +from ragas.metrics import answer_relevancy, faithfulness from src.shared.common_fn import load_embedding_model - load_dotenv() -RAGAS_MODEL_VERSIONS = { - "openai_gpt_3.5": "gpt-3.5-turbo-16k", - "openai_gpt_4": "gpt-4-turbo-2024-04-09", - "openai_gpt_4o_mini": "gpt-4o-mini-2024-07-18", - "openai_gpt_4o": "gpt-4o-mini-2024-07-18", - "groq_llama3_70b": "groq_llama3_70b", -} EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL") EMBEDDING_FUNCTION, _ = load_embedding_model(EMBEDDING_MODEL) - -def get_ragas_llm(model: str) -> Tuple[object, str]: - """Retrieves the specified language model. Improved error handling and structure.""" - env_key = f"LLM_MODEL_CONFIG_{model}" - env_value = os.environ.get(env_key) - logging.info(f"Loading model configuration: {env_key}") - try: - if "openai" in model: - model_name = RAGAS_MODEL_VERSIONS[model] - llm = ChatOpenAI( - api_key=os.environ.get("OPENAI_API_KEY"), model=model_name, temperature=0 - ) - elif "groq" in model: - model_name, base_url, api_key = env_value.split(",") - llm = ChatGroq(api_key=api_key, model_name=model_name, temperature=0) - else: - raise ValueError(f"Unsupported model for evaluation: {model}") - - logging.info(f"Model loaded - Model Version: {model}") - return llm, model_name - except (ValueError, KeyError) as e: - logging.error(f"Error loading LLM: {e}") - raise - - -def get_ragas_metrics( - question: str, context: str, answer: str, model: str -) -> Optional[Dict[str, float]]: +def get_ragas_metrics(question: str, context: list, answer: list, model: str): """Calculates RAGAS metrics.""" try: start_time = time.time() dataset = Dataset.from_dict( - {"question": [question], "answer": [answer], "contexts": [[context]]} + {"question": [question] * len(answer), "answer": answer, "contexts": [[ctx] for ctx in context]} ) - logging.info("Dataset created successfully.") - - llm, model_name = get_ragas_llm(model=model) + logging.info("Evaluation dataset created successfully.") + if ("diffbot" in model) or ("ollama" in model): + raise ValueError(f"Unsupported model for evaluation: {model}") + else: + llm, model_name = get_llm(model=model) + logging.info(f"Evaluating with model: {model_name}") - + score = evaluate( dataset=dataset, - metrics=[faithfulness, answer_relevancy, context_utilization], + metrics=[faithfulness, answer_relevancy], llm=llm, embeddings=EMBEDDING_FUNCTION, ) - + score_dict = ( - score.to_pandas()[["faithfulness", "answer_relevancy", "context_utilization"]] + score.to_pandas()[["faithfulness", "answer_relevancy"]] + .fillna(0) .round(4) - .to_dict(orient="records")[0] + .to_dict(orient="list") ) end_time = time.time() logging.info(f"Evaluation completed in: {end_time - start_time:.2f} seconds") @@ -90,7 +46,7 @@ def get_ragas_metrics( except ValueError as e: if "Unsupported model for evaluation" in str(e): logging.error(f"Unsupported model error: {e}") - return {"error": str(e)} # Return the specific error message as a dictionary + return {"error": str(e)} logging.exception(f"ValueError during metrics evaluation: {e}") return {"error": str(e)} except Exception as e: diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 266bf98ac..0263f00e6 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -13,7 +13,7 @@ import { import { DocumentDuplicateIconOutline, ClipboardDocumentCheckIconOutline } from '@neo4j-ndl/react/icons'; import '../../styling/info.css'; import Neo4jRetrievalLogo from '../../assets/images/Neo4jRetrievalLogo.png'; -import { Entity, ExtendedNode, UserCredentials, chatInfoMessage } from '../../types'; +import { ExtendedNode, UserCredentials, chatInfoMessage, multimodelmetric } from '../../types'; import { useContext, useEffect, useMemo, useState } from 'react'; import GraphViewButton from '../Graph/GraphViewButton'; import { chunkEntitiesAPI } from '../../services/ChunkEntitiesInfo'; @@ -29,7 +29,8 @@ import { Relationship } from '@neo4j-nvl/base'; import { getChatMetrics } from '../../services/GetRagasMetric'; import MetricsTab from './MetricsTab'; import { Stack } from '@mui/material'; -import { capitalizeWithUnderscore } from '../../utils/Utils'; +import { capitalizeWithUnderscore, getNodes } from '../../utils/Utils'; +import MultiModeMetrics from './MultiModeMetrics'; const ChatInfoModal: React.FC = ({ sources, @@ -46,14 +47,6 @@ const ChatInfoModal: React.FC = ({ metriccontexts, metricquestion, metricmodel, - saveNodes, - saveChunks, - saveChatRelationships, - saveCommunities, - saveInfoEntitites, - saveMetrics, - toggleInfoLoading, - toggleMetricsLoading, nodes, chunks, infoEntities, @@ -62,6 +55,16 @@ const ChatInfoModal: React.FC = ({ relationships, infoLoading, metricsLoading, + activeChatmodes, + metricError, + saveNodes, + saveChunks, + saveChatRelationships, + saveCommunities, + saveInfoEntitites, + saveMetrics, + toggleInfoLoading, + toggleMetricsLoading, }) => { const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); @@ -73,6 +76,8 @@ const ChatInfoModal: React.FC = ({ const [, copy] = useCopyToClipboard(); const [copiedText, setcopiedText] = useState(false); const [showMetricsTable, setShowMetricsTable] = useState(Boolean(metricDetails)); + const [multiModelMetrics, setMultiModelMetrics] = useState([]); + const [multiModeError, setMultiModeError] = useState(''); console.log('node', nodeDetails); @@ -126,32 +131,11 @@ const ChatInfoModal: React.FC = ({ .filter((rel: any) => nodeIds.has(rel.end_node_element_id) && nodeIds.has(rel.start_node_element_id)); const communitiesData = response?.data?.data?.community_data; const chunksData = response?.data?.data?.chunk_data; - - saveInfoEntitites( - nodesData.map((n: Entity) => { - if (!n.labels.length && mode === chatModeLables.entity_vector) { - return { - ...n, - labels: ['Entity'], - }; - } - return n; - }) - ); - saveNodes( - nodesData.map((n: ExtendedNode) => { - if (!n.labels.length && mode === chatModeLables.entity_vector) { - return { - ...n, - labels: ['Entity'], - }; - } - return n ?? []; - }) - ); + saveInfoEntitites(getNodes(nodesData, mode)); + saveNodes(getNodes(nodesData, mode)); saveChatRelationships(relationshipsData ?? []); saveCommunities( - (communitiesData || []) + (communitiesData ?? []) .map((community: { element_id: string }) => { const communityScore = nodeDetails?.communitydetails?.find( (c: { id: string }) => c.id === community.element_id @@ -163,7 +147,6 @@ const ChatInfoModal: React.FC = ({ }) .sort((a: any, b: any) => b.score - a.score) ); - saveChunks( chunksData .map((chunk: any) => { @@ -184,29 +167,77 @@ const ChatInfoModal: React.FC = ({ } () => { setcopiedText(false); - toggleMetricsLoading(); + if (metricsLoading) { + toggleMetricsLoading(); + } + setMultiModelMetrics([]); }; - }, [nodeDetails, mode, error]); + }, [nodeDetails, mode, error, metricsLoading]); const onChangeTabs = (tabId: number) => { setActiveTab(tabId); }; const loadMetrics = async () => { - setShowMetricsTable(true); - try { - toggleMetricsLoading(); - const response = await getChatMetrics(metricquestion, metriccontexts, metricanswer, metricmodel); - toggleMetricsLoading(); - if (response.data.status === 'Success') { - saveMetrics({ ...response.data.data, error: '' }); + if (activeChatmodes) { + if (Object.keys(activeChatmodes).length <= 1) { + setShowMetricsTable(true); + const defaultMode = Object.keys(activeChatmodes)[0]; + try { + toggleMetricsLoading(); + const response = await getChatMetrics(metricquestion, [metriccontexts], [metricanswer], metricmodel, [ + defaultMode, + ]); + toggleMetricsLoading(); + if (response.data.status === 'Success') { + const data = response; + saveMetrics(data.data.data[defaultMode]); + } else { + throw new Error(response.data.error); + } + } catch (error) { + if (error instanceof Error) { + toggleMetricsLoading(); + console.log('Error in getting chat metrics', error); + saveMetrics({ faithfulness: 0, answer_relevancy: 0, error: error.message }); + } + } } else { - throw new Error(response.data.error); - } - } catch (error) { - if (error instanceof Error) { + setShowMetricsTable(true); toggleMetricsLoading(); - console.log('Error in getting chat metrics', error); - saveMetrics({ error: error.message, faithfulness: 0, answer_relevancy: 0, context_utilization: 0 }); + const contextarray = Object.values(activeChatmodes).map((r) => { + return r.metric_contexts; + }); + const answerarray = Object.values(activeChatmodes).map((r) => { + return r.metric_answer; + }); + const modesarray = Object.keys(activeChatmodes).map((mode) => { + return mode; + }); + try { + const responses = await getChatMetrics( + metricquestion, + contextarray as string[], + answerarray as string[], + metricmodel, + modesarray + ); + toggleMetricsLoading(); + if (responses.data.status === 'Success') { + const modewisedata = responses.data.data; + const metricsdata = Object.entries(modewisedata).map(([mode, scores]) => { + return { mode, answer_relevancy: scores.answer_relevancy, faithfulness: scores.faithfulness }; + }); + setMultiModelMetrics(metricsdata); + } else { + throw new Error(responses.data.error); + } + } catch (error) { + toggleMetricsLoading(); + console.log('Error in getting chat metrics', error); + if (error instanceof Error) { + setMultiModeError(error.message); + } + } } } }; @@ -303,13 +334,18 @@ const ChatInfoModal: React.FC = ({ Answer Relevancy: Determines How well the answer addresses the user's question. - - Context Utilization: Determines How effectively the system uses the - retrieved information to answer thequestion. - - {showMetricsTable && } + {showMetricsTable && activeChatmodes != null && Object.keys(activeChatmodes).length > 1 && ( + + )} + {showMetricsTable && activeChatmodes != null && Object.keys(activeChatmodes).length <= 1 && ( + + )} {!metricDetails && ( )} + {!multiModelMetrics && ( + + )} diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index a1eeb35ef..1f7049f0d 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -22,9 +22,9 @@ import { ExtendedNode, ExtendedRelationship, Messages, - MetricsState, ResponseMode, UserCredentials, + metricstate, nodeDetailsProps, } from '../../types'; import { useCredentials } from '../../context/UserCredentials'; @@ -74,7 +74,7 @@ const Chatbot: FC = (props) => { const [nodes, setNodes] = useState([]); const [relationships, setRelationships] = useState([]); const [chunks, setChunks] = useState([]); - const [metricDetails, setMetricDetails] = useState(null); + const [metricDetails, setMetricDetails] = useState(null); const [infoEntities, setInfoEntities] = useState([]); const [communities, setCommunities] = useState([]); const [infoLoading, toggleInfoLoading] = useReducer((s) => !s, false); @@ -112,7 +112,7 @@ const Chatbot: FC = (props) => { const saveChunks = (chatChunks: Chunk[]) => { setChunks(chatChunks); }; - const saveMetrics = (metricInfo: MetricsState) => { + const saveMetrics = (metricInfo: metricstate) => { setMetricDetails(metricInfo); }; const saveCommunities = (chatCommunities: Community[]) => { @@ -269,15 +269,15 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId - ? { - ...msg, - modes: { - ...msg.modes, - [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, - }, - } - : msg) + (msg.id === chatbotMessageId + ? { + ...msg, + modes: { + ...msg.modes, + [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, + }, + } + : msg) ) ); } @@ -290,19 +290,19 @@ const Chatbot: FC = (props) => { if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId - ? { - ...msg, - isLoading: false, - isTyping: false, - modes: { - [chatModes[0]]: { - message: 'An error occurred while processing your request.', - error: error.message, - }, - }, - } - : msg) + (msg.id === chatbotMessageId + ? { + ...msg, + isLoading: false, + isTyping: false, + modes: { + [chatModes[0]]: { + message: 'An error occurred while processing your request.', + error: error.message, + }, + }, + } + : msg) ) ); } @@ -391,7 +391,10 @@ const Chatbot: FC = (props) => { setMetricContext(currentMode.metric_contexts ?? ''); setMetricAnswer(currentMode.metric_answer ?? ''); setActiveChat(chat); - if ((previousActiveChat != null && chat.id != previousActiveChat?.id) || (previousActiveChat != null && chat.currentMode != previousActiveChat.currentMode)) { + if ( + (previousActiveChat != null && chat.id != previousActiveChat?.id) || + (previousActiveChat != null && chat.currentMode != previousActiveChat.currentMode) + ) { setNodes([]); setChunks([]); setInfoEntities([]); @@ -450,12 +453,14 @@ const Chatbot: FC = (props) => {
    {chat.modes[chat.currentMode]?.message || ''} @@ -539,8 +544,9 @@ const Chatbot: FC = (props) => {
    = (props) => { infoEntities={infoEntities} relationships={relationships} chunks={chunks} - metricDetails={metricDetails} + metricDetails={activeChat != undefined && metricDetails != null ? metricDetails : undefined} + metricError={activeChat != undefined && metricDetails != null ? (metricDetails.error as string) : ''} communities={communities} infoLoading={infoLoading} metricsLoading={metricsLoading} @@ -641,6 +648,7 @@ const Chatbot: FC = (props) => { saveNodes={saveNodes} toggleInfoLoading={toggleInfoLoading} toggleMetricsLoading={toggleMetricsLoading} + activeChatmodes={activeChat?.modes} /> diff --git a/frontend/src/components/ChatBot/MetricsTab.tsx b/frontend/src/components/ChatBot/MetricsTab.tsx index 17b5e67ae..55d37db4c 100644 --- a/frontend/src/components/ChatBot/MetricsTab.tsx +++ b/frontend/src/components/ChatBot/MetricsTab.tsx @@ -1,5 +1,4 @@ import { Banner, Box, DataGrid, DataGridComponents, Typography } from '@neo4j-ndl/react'; -import { MetricsState } from '../../types'; import { memo, useMemo, useRef } from 'react'; import { useReactTable, @@ -13,9 +12,16 @@ import { capitalize } from '../../utils/Utils'; function MetricsTab({ metricsLoading, metricDetails, + error, }: { metricsLoading: boolean; - metricDetails: MetricsState | null; + metricDetails: + | { + faithfulness: number; + answer_relevancy: number; + } + | undefined; + error: string; }) { const columnHelper = createColumnHelper<{ metric: string; score: number }>(); const tableRef = useRef(null); @@ -53,11 +59,9 @@ function MetricsTab({ const table = useReactTable({ data: metricDetails != null && !metricsLoading - ? Object.entries(metricDetails) - .slice(0, Object.keys(metricDetails).length - 1) - .map(([key, value]) => { - return { metric: key, score: value }; - }) + ? Object.entries(metricDetails).map(([key, value]) => { + return { metric: key, score: value }; + }) : [], columns, getCoreRowModel: getCoreRowModel(), @@ -72,8 +76,8 @@ function MetricsTab({ }); return ( - {metricDetails != null && metricDetails?.error?.trim() != '' ? ( - {metricDetails?.error} + {error != undefined && error?.trim() != '' ? ( + {error} ) : ( (null); + + const columnHelper = createColumnHelper(); + const columns = useMemo( + () => [ + columnHelper.accessor((row) => row.mode, { + id: 'Mode', + cell: (info) => { + const metric = info.getValue(); + const capitilizedMetric = metric.includes('_') + ? metric + .split('_') + .map((w) => capitalize(w)) + .join(' ') + : capitalize(metric); + return ( +
    + {capitilizedMetric} +
    + ); + }, + header: () => Mode, + footer: (info) => info.column.id, + }), + columnHelper.accessor((row) => row.answer_relevancy, { + id: 'Answer Relevancy', + cell: (info) => { + return {info.getValue().toFixed(2)}; + }, + header: () => Answer Relevancy, + }), + columnHelper.accessor((row) => row.faithfulness, { + id: 'Score', + cell: (info) => { + return {info.getValue().toFixed(2)}; + }, + header: () => Faithfulness, + }), + ], + [] + ); + const table = useReactTable({ + data, + columns, + getCoreRowModel: getCoreRowModel(), + getFilteredRowModel: getFilteredRowModel(), + getPaginationRowModel: getPaginationRowModel(), + enableGlobalFilter: false, + autoResetPageIndex: false, + enableRowSelection: true, + enableMultiRowSelection: true, + enableSorting: true, + getSortedRowModel: getSortedRowModel(), + }); + return ( + + {error?.trim() != '' ? ( + {error} + ) : ( + , + PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { + return ( + + ); + }, + }} + /> + )} + + ); +} diff --git a/frontend/src/services/GetRagasMetric.ts b/frontend/src/services/GetRagasMetric.ts index a72c69e5d..90e365d00 100644 --- a/frontend/src/services/GetRagasMetric.ts +++ b/frontend/src/services/GetRagasMetric.ts @@ -1,12 +1,19 @@ import { MetricsResponse } from '../types'; import api from '../API/Index'; -export const getChatMetrics = async (question: string, context: string, answer: string, model: string) => { +export const getChatMetrics = async ( + question: string, + context: string[], + answer: string[], + model: string, + mode: string[] +) => { const formData = new FormData(); formData.append('question', question); - formData.append('context', `[${context}]`); - formData.append('answer', answer); + formData.append('context', JSON.stringify(context)); + formData.append('answer', JSON.stringify(answer)); formData.append('model', model); + formData.append('mode', JSON.stringify(mode)); try { const response = await api.post(`/metric`, formData); return response; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 6e33515d9..39f9a041c 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -410,11 +410,18 @@ export interface duplicateNodesData extends Partial { export interface OrphanNodeResponse extends Partial { data: orphanNodeProps[]; } -export type metricdetails = { +export type metricstate = { faithfulness: number; answer_relevancy: number; - context_utilization: number; + error?: string; }; +export type metricdetails = Record; + +export interface multimodelmetric { + mode: string; + answer_relevancy: number; + faithfulness: number; +} export interface MetricsResponse extends Omit { data: metricdetails; } @@ -430,10 +437,6 @@ export interface SourceListServerData { message?: string; } -export interface MetricsState extends metricdetails { - error?: string; -} - export interface chatInfoMessage extends Partial { sources: string[]; model: string; @@ -452,16 +455,27 @@ export interface chatInfoMessage extends Partial { nodes: ExtendedNode[]; relationships: ExtendedRelationship[]; chunks: Chunk[]; - metricDetails: MetricsState | null; + metricDetails: + | { + faithfulness: number; + answer_relevancy: number; + } + | undefined; + metricError: string; infoEntities: Entity[]; communities: Community[]; infoLoading: boolean; metricsLoading: boolean; + activeChatmodes: + | { + [key: string]: ResponseMode; + } + | undefined; saveInfoEntitites: (entities: Entity[]) => void; saveNodes: (chatNodes: ExtendedNode[]) => void; saveChatRelationships: (chatRels: ExtendedRelationship[]) => void; saveChunks: (chatChunks: Chunk[]) => void; - saveMetrics: (metricInfo: MetricsState) => void; + saveMetrics: (metricInfo: metricstate) => void; saveCommunities: (chatCommunities: Community[]) => void; toggleInfoLoading: React.DispatchWithoutAction; toggleMetricsLoading: React.DispatchWithoutAction; @@ -825,6 +839,11 @@ export type GraphPropertiesPanelProps = { newScheme: Scheme; }; + +export type withId = { + id: string; +}; + export interface GraphViewHandlerProps { nodeValues?: ExtendedNode[]; relationshipValues?: ExtendedRelationship[]; @@ -836,3 +855,4 @@ export interface GraphViewHandlerProps { entityInfo?: Entity[]; mode?: string; } + diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index bd5890c91..c67ef5b27 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -32,7 +32,20 @@ export const defaultLLM = llms?.includes('openai_gpt_4o') : llms?.includes('gemini_1.5_pro') ? 'gemini_1.5_pro' : 'diffbot'; -export const supportedLLmsForRagas = ['openai_gpt_3.5', 'openai_gpt_4o', 'openai_gpt_4o_mini', 'groq_llama3_70b']; +export const supportedLLmsForRagas = [ + 'openai_gpt_3.5', + 'openai_gpt_4', + 'openai_gpt_4o', + 'openai_gpt_4o_mini', + 'gemini_1.5_pro', + 'gemini_1.5_flash', + 'azure_ai_gpt_35', + 'azure_ai_gpt_4o', + 'groq_llama3_70b', + 'anthropic_claude_3_5_sonnet', + 'fireworks_llama_v3_70b', + 'bedrock_claude_3_5_sonnet', +]; export const chatModeLables = { vector: 'vector', graph: 'graph', diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 27645ac7e..5727f33ea 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -509,3 +509,14 @@ export function downloadClickHandler( downloadLinkRef.current.click(); } } +export function getNodes(nodesData: Array, mode: string) { + return nodesData.map((n) => { + if (!n.labels.length && mode === chatModeLables.entity_vector) { + return { + ...n, + labels: ['Entity'], + }; + } + return n; + }); +} From 952291d13a910fb0cb5a14bd6d433b90ad0eed99 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 18 Oct 2024 12:53:30 +0000 Subject: [PATCH 188/292] lint fixes --- frontend/src/components/ChatBot/ChunkInfo.tsx | 2 +- frontend/src/components/FileTable.tsx | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index 7f31cd4d5..fa8496581 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -22,7 +22,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { const [neoRels, setNeoRels] = useState([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); - const [loadingGraphView, setLoadingGraphView] = useState(false); + const [_, setLoadingGraphView] = useState(false); const handleChunkClick = async (elementId: string, viewMode: string) => { handleGraphNodeClick( diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index ad0f7bc19..6289c1adf 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -7,7 +7,6 @@ import { ProgressBar, StatusIndicator, TextLink, - Tip, Typography, useCopyToClipboard, } from '@neo4j-ndl/react'; From f5a5eddf8865eb0e1f461105a1c41dbaeb689361 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Mon, 21 Oct 2024 05:04:27 +0000 Subject: [PATCH 189/292] fix: multimode metrics state handling fix: lint fixes --- .../src/components/ChatBot/ChatInfoModal.tsx | 18 +++--- frontend/src/components/ChatBot/Chatbot.tsx | 10 +++ frontend/src/components/ChatBot/ChunkInfo.tsx | 2 +- .../src/components/ChatBot/EntitiesInfo.tsx | 2 +- .../src/components/ChatBot/SourcesInfo.tsx | 8 +-- frontend/src/components/ChatBot/chatInfo.ts | 64 +++++++++---------- frontend/src/components/Dropdown.tsx | 4 +- .../components/Graph/GraphPropertiesTable.tsx | 1 - .../Deduplication/index.tsx | 2 +- .../EntityExtractionSetting.tsx | 10 +-- frontend/src/types.ts | 4 +- frontend/src/utils/Constants.ts | 4 +- frontend/src/utils/Utils.ts | 1 - 13 files changed, 70 insertions(+), 60 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 0263f00e6..a8e612ca5 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -13,7 +13,7 @@ import { import { DocumentDuplicateIconOutline, ClipboardDocumentCheckIconOutline } from '@neo4j-ndl/react/icons'; import '../../styling/info.css'; import Neo4jRetrievalLogo from '../../assets/images/Neo4jRetrievalLogo.png'; -import { ExtendedNode, UserCredentials, chatInfoMessage, multimodelmetric } from '../../types'; +import { ExtendedNode, UserCredentials, chatInfoMessage } from '../../types'; import { useContext, useEffect, useMemo, useState } from 'react'; import GraphViewButton from '../Graph/GraphViewButton'; import { chunkEntitiesAPI } from '../../services/ChunkEntitiesInfo'; @@ -57,6 +57,7 @@ const ChatInfoModal: React.FC = ({ metricsLoading, activeChatmodes, metricError, + multiModelMetrics, saveNodes, saveChunks, saveChatRelationships, @@ -65,6 +66,7 @@ const ChatInfoModal: React.FC = ({ saveMetrics, toggleInfoLoading, toggleMetricsLoading, + saveMultimodemetrics, }) => { const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); @@ -76,11 +78,8 @@ const ChatInfoModal: React.FC = ({ const [, copy] = useCopyToClipboard(); const [copiedText, setcopiedText] = useState(false); const [showMetricsTable, setShowMetricsTable] = useState(Boolean(metricDetails)); - const [multiModelMetrics, setMultiModelMetrics] = useState([]); const [multiModeError, setMultiModeError] = useState(''); - console.log('node', nodeDetails); - const actions: CypherCodeBlockProps['actions'] = useMemo( () => [ { @@ -170,7 +169,6 @@ const ChatInfoModal: React.FC = ({ if (metricsLoading) { toggleMetricsLoading(); } - setMultiModelMetrics([]); }; }, [nodeDetails, mode, error, metricsLoading]); @@ -181,7 +179,7 @@ const ChatInfoModal: React.FC = ({ if (activeChatmodes) { if (Object.keys(activeChatmodes).length <= 1) { setShowMetricsTable(true); - const defaultMode = Object.keys(activeChatmodes)[0]; + const [defaultMode] = Object.keys(activeChatmodes); try { toggleMetricsLoading(); const response = await getChatMetrics(metricquestion, [metriccontexts], [metricanswer], metricmodel, [ @@ -227,7 +225,7 @@ const ChatInfoModal: React.FC = ({ const metricsdata = Object.entries(modewisedata).map(([mode, scores]) => { return { mode, answer_relevancy: scores.answer_relevancy, faithfulness: scores.faithfulness }; }); - setMultiModelMetrics(metricsdata); + saveMultimodemetrics(metricsdata); } else { throw new Error(responses.data.error); } @@ -346,7 +344,7 @@ const ChatInfoModal: React.FC = ({ {showMetricsTable && activeChatmodes != null && Object.keys(activeChatmodes).length <= 1 && ( )} - {!metricDetails && ( + {!metricDetails && activeChatmodes != undefined && Object.keys(activeChatmodes).length <= 1 && ( )} - {!multiModelMetrics && ( + {!multiModelMetrics.length && activeChatmodes != undefined && Object.keys(activeChatmodes).length > 1 && ( )} diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 1f7049f0d..3c1992645 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -25,6 +25,7 @@ import { ResponseMode, UserCredentials, metricstate, + multimodelmetric, nodeDetailsProps, } from '../../types'; import { useCredentials } from '../../context/UserCredentials'; @@ -81,6 +82,7 @@ const Chatbot: FC = (props) => { const [metricsLoading, toggleMetricsLoading] = useReducer((s) => !s, false); const downloadLinkRef = useRef(null); const [activeChat, setActiveChat] = useState(null); + const [multiModelMetrics, setMultiModelMetrics] = useState([]); const [_, copy] = useCopyToClipboard(); const { speak, cancel, speaking } = useSpeechSynthesis({ @@ -112,6 +114,9 @@ const Chatbot: FC = (props) => { const saveChunks = (chatChunks: Chunk[]) => { setChunks(chatChunks); }; + const saveMultimodemetrics = (metrics: multimodelmetric[]) => { + setMultiModelMetrics(metrics); + }; const saveMetrics = (metricInfo: metricstate) => { setMetricDetails(metricInfo); }; @@ -400,6 +405,9 @@ const Chatbot: FC = (props) => { setInfoEntities([]); setMetricDetails(null); } + if (previousActiveChat != null && chat.id != previousActiveChat?.id) { + setMultiModelMetrics([]); + } }, []); const speechHandler = useCallback((chat: Messages) => { @@ -648,7 +656,9 @@ const Chatbot: FC = (props) => { saveNodes={saveNodes} toggleInfoLoading={toggleInfoLoading} toggleMetricsLoading={toggleMetricsLoading} + saveMultimodemetrics={saveMultimodemetrics} activeChatmodes={activeChat?.modes} + multiModelMetrics={multiModelMetrics} /> diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index fa8496581..17083fe1e 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -24,7 +24,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { const [viewPoint, setViewPoint] = useState(''); const [_, setLoadingGraphView] = useState(false); - const handleChunkClick = async (elementId: string, viewMode: string) => { + const handleChunkClick = (elementId: string, viewMode: string) => { handleGraphNodeClick( userCredentials as UserCredentials, elementId, diff --git a/frontend/src/components/ChatBot/EntitiesInfo.tsx b/frontend/src/components/ChatBot/EntitiesInfo.tsx index 2b1ff3cca..80e4fdafa 100644 --- a/frontend/src/components/ChatBot/EntitiesInfo.tsx +++ b/frontend/src/components/ChatBot/EntitiesInfo.tsx @@ -43,7 +43,7 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in return Object.keys(labelCounts).sort((a, b) => labelCounts[b] - labelCounts[a]); }, [labelCounts]); - const handleEntityClick = async (elementId: string, viewMode: string) => { + const handleEntityClick = (elementId: string, viewMode: string) => { handleGraphNodeClick( userCredentials as UserCredentials, elementId, diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx index 79d710fa2..35d76aefb 100644 --- a/frontend/src/components/ChatBot/SourcesInfo.tsx +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -12,14 +12,14 @@ import s3logo from '../../assets/images/s3logo.png'; const filterUniqueChunks = (chunks: Chunk[]) => { const chunkSource = new Set(); - return chunks.filter(chunk => { + return chunks.filter((chunk) => { const sourceCheck = `${chunk.fileName}-${chunk.fileSource}`; if (chunkSource.has(sourceCheck)) { return false; - } else { + } chunkSource.add(sourceCheck); return true; - } + }); }; @@ -147,4 +147,4 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { ); }; -export default SourcesInfo; \ No newline at end of file +export default SourcesInfo; diff --git a/frontend/src/components/ChatBot/chatInfo.ts b/frontend/src/components/ChatBot/chatInfo.ts index 821cf69f8..c7e990ae7 100644 --- a/frontend/src/components/ChatBot/chatInfo.ts +++ b/frontend/src/components/ChatBot/chatInfo.ts @@ -2,39 +2,39 @@ import { getNeighbors } from '../../services/GraphQuery'; import { NeoNode, NeoRelationship, UserCredentials } from '../../types'; export const handleGraphNodeClick = async ( - userCredentials: UserCredentials, - elementId: string, - viewMode: string, - setNeoNodes: React.Dispatch>, - setNeoRels: React.Dispatch>, - setOpenGraphView: React.Dispatch>, - setViewPoint: React.Dispatch>, - setLoadingGraphView?: React.Dispatch> + userCredentials: UserCredentials, + elementId: string, + viewMode: string, + setNeoNodes: React.Dispatch>, + setNeoRels: React.Dispatch>, + setOpenGraphView: React.Dispatch>, + setViewPoint: React.Dispatch>, + setLoadingGraphView?: React.Dispatch> ) => { - if (setLoadingGraphView) { - setLoadingGraphView(true); + if (setLoadingGraphView) { + setLoadingGraphView(true); + } + try { + const result = await getNeighbors(userCredentials, elementId); + if (result && result.data.data.nodes.length > 0) { + let { nodes } = result.data.data; + if (viewMode === 'Chunk') { + nodes = nodes.filter((node: NeoNode) => node.labels.length === 1 && node.properties.id !== null); + } + const nodeIds = new Set(nodes.map((node: NeoNode) => node.element_id)); + const relationships = result.data.data.relationships.filter( + (rel: NeoRelationship) => nodeIds.has(rel.end_node_element_id) && nodeIds.has(rel.start_node_element_id) + ); + setNeoNodes(nodes); + setNeoRels(relationships); + setOpenGraphView(true); + setViewPoint('chatInfoView'); } - try { - const result = await getNeighbors(userCredentials, elementId); - if (result && result.data.data.nodes.length > 0) { - let { nodes } = result.data.data; - if (viewMode === 'Chunk') { - nodes = nodes.filter((node: NeoNode) => node.labels.length === 1 && node.properties.id !== null); - } - const nodeIds = new Set(nodes.map((node: NeoNode) => node.element_id)); - const relationships = result.data.data.relationships.filter( - (rel: NeoRelationship) => nodeIds.has(rel.end_node_element_id) && nodeIds.has(rel.start_node_element_id) - ); - setNeoNodes(nodes); - setNeoRels(relationships); - setOpenGraphView(true); - setViewPoint('chatInfoView'); - } - } catch (error: any) { - console.error('Error fetching neighbors:', error); - } finally { - if (setLoadingGraphView) { - setLoadingGraphView(false); - } + } catch (error: any) { + console.error('Error fetching neighbors:', error); + } finally { + if (setLoadingGraphView) { + setLoadingGraphView(false); } + } }; diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index 48188f38b..087e0d71c 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -39,7 +39,9 @@ const DropdownComponent: React.FC = ({ }; }), placeholder: placeholder || 'Select an option', - defaultValue: defaultValue ? { label: capitalize(defaultValue), value: defaultValue } : undefined, + defaultValue: defaultValue + ? { label: capitalizeWithUnderscore(defaultValue), value: defaultValue } + : undefined, menuPlacement: 'auto', isDisabled: isDisabled, value: value, diff --git a/frontend/src/components/Graph/GraphPropertiesTable.tsx b/frontend/src/components/Graph/GraphPropertiesTable.tsx index 7600ccd35..fa270455b 100644 --- a/frontend/src/components/Graph/GraphPropertiesTable.tsx +++ b/frontend/src/components/Graph/GraphPropertiesTable.tsx @@ -2,7 +2,6 @@ import { GraphLabel, Typography } from '@neo4j-ndl/react'; import { GraphPropertiesTableProps } from '../../types'; const GraphPropertiesTable = ({ propertiesWithTypes }: GraphPropertiesTableProps): JSX.Element => { - console.log('props', propertiesWithTypes); return (
    diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 05facb141..420fbe590 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -106,7 +106,7 @@ export default function DeduplicationTab() { }); }; - const handleDuplicateNodeClick = async (elementId: string, viewMode: string) => { + const handleDuplicateNodeClick = (elementId: string, viewMode: string) => { handleGraphNodeClick( userCredentials as UserCredentials, elementId, diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx index e5e0bc033..83081dd93 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx @@ -317,8 +317,9 @@ export default function EntityExtractionSetting({ options: nodeLabelOptions, onChange: onChangenodes, value: selectedNodes, - classNamePrefix: `${isTablet ? 'tablet_entity_extraction_Tab_node_label' : 'entity_extraction_Tab_node_label' - }`, + classNamePrefix: `${ + isTablet ? 'tablet_entity_extraction_Tab_node_label' : 'entity_extraction_Tab_node_label' + }`, }} type='creatable' /> @@ -332,8 +333,9 @@ export default function EntityExtractionSetting({ options: relationshipTypeOptions, onChange: onChangerels, value: selectedRels, - classNamePrefix: `${isTablet ? 'tablet_entity_extraction_Tab_relationship_label' : 'entity_extraction_Tab_relationship_label' - }`, + classNamePrefix: `${ + isTablet ? 'tablet_entity_extraction_Tab_relationship_label' : 'entity_extraction_Tab_relationship_label' + }`, }} type='creatable' /> diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 39f9a041c..c43ee5020 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -471,6 +471,7 @@ export interface chatInfoMessage extends Partial { [key: string]: ResponseMode; } | undefined; + multiModelMetrics: multimodelmetric[]; saveInfoEntitites: (entities: Entity[]) => void; saveNodes: (chatNodes: ExtendedNode[]) => void; saveChatRelationships: (chatRels: ExtendedRelationship[]) => void; @@ -479,6 +480,7 @@ export interface chatInfoMessage extends Partial { saveCommunities: (chatCommunities: Community[]) => void; toggleInfoLoading: React.DispatchWithoutAction; toggleMetricsLoading: React.DispatchWithoutAction; + saveMultimodemetrics: (metrics: multimodelmetric[]) => void; } export interface eventResponsetypes extends Omit { @@ -839,7 +841,6 @@ export type GraphPropertiesPanelProps = { newScheme: Scheme; }; - export type withId = { id: string; }; @@ -855,4 +856,3 @@ export interface GraphViewHandlerProps { entityInfo?: Entity[]; mode?: string; } - diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index c67ef5b27..8ab428999 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -125,7 +125,7 @@ export const tooltips = { clearChat: 'Clear Chat History', continue: 'Continue', clearGraphSettings: 'Clear configured Graph Schema', - applySettings:'Apply Graph Schema' + applySettings: 'Apply Graph Schema', }; export const buttonCaptions = { @@ -149,7 +149,7 @@ export const buttonCaptions = { continueSettings: 'Continue', clearSettings: 'Clear Schema', ask: 'Ask', - applyGraphSchema:'Apply' + applyGraphSchema: 'Apply', }; export const POST_PROCESSING_JOBS: { title: string; description: string }[] = [ diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 5727f33ea..226fd03f9 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -332,7 +332,6 @@ export const filterData = ( filteredNodes = allNodes; filteredRelations = allRelationships; filteredScheme = scheme; - console.log('entity', filteredScheme); } return { filteredNodes, filteredRelations, filteredScheme }; }; From b3f1dd0f78f0b53329e3332762e3989f61254eb7 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Mon, 21 Oct 2024 05:45:36 +0000 Subject: [PATCH 190/292] fix: Multimode metrics mode change state issue fix: chunk list style issue --- frontend/src/components/ChatBot/ChatInfoModal.tsx | 5 +++-- frontend/src/components/ChatBot/ChunkInfo.tsx | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index a8e612ca5..757503fb1 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -78,6 +78,7 @@ const ChatInfoModal: React.FC = ({ const [, copy] = useCopyToClipboard(); const [copiedText, setcopiedText] = useState(false); const [showMetricsTable, setShowMetricsTable] = useState(Boolean(metricDetails)); + const [showMultiModeMetrics, setShowMultiModeMetrics] = useState(Boolean(multiModelMetrics.length)) const [multiModeError, setMultiModeError] = useState(''); const actions: CypherCodeBlockProps['actions'] = useMemo( @@ -200,7 +201,7 @@ const ChatInfoModal: React.FC = ({ } } } else { - setShowMetricsTable(true); + setShowMultiModeMetrics(true) toggleMetricsLoading(); const contextarray = Object.values(activeChatmodes).map((r) => { return r.metric_contexts; @@ -334,7 +335,7 @@ const ChatInfoModal: React.FC = ({ - {showMetricsTable && activeChatmodes != null && Object.keys(activeChatmodes).length > 1 && ( + {showMultiModeMetrics && activeChatmodes != null && Object.keys(activeChatmodes).length > 1 && ( = ({ loading, chunks, mode }) => { ) : chunks?.length > 0 ? (
    -
      +
        {chunks.map((chunk) => (
      • {chunk?.page_number ? ( From fb5e000ef36e36531418a0f82c15199a9666fc2c Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Mon, 21 Oct 2024 05:54:25 +0000 Subject: [PATCH 191/292] fix: list style fix --- frontend/src/components/ChatBot/CommunitiesInfo.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index f562a4ed4..1a769a1f3 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -36,7 +36,7 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = ) : communities?.length > 0 ? (
        -
          +
            {communities.map((community, index) => (
          • From fd224a1478e5f98760d6a84ff0e8c9c2bcda93e6 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:48:13 +0000 Subject: [PATCH 192/292] Correct TYPO mistake --- backend/score.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/score.py b/backend/score.py index d78a13746..46c4ba137 100644 --- a/backend/score.py +++ b/backend/score.py @@ -291,10 +291,10 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database if "enable_communities" in tasks: await asyncio.to_thread(create_communities, uri, userName, password, database) - josn_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + json_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logging.info(f'created communities') - logger.log_struct(josn_obj) + logger.log_struct(json_obj) return create_api_response('Success', message='All tasks completed successfully') except Exception as e: From cb77c187c6274f0b6ec0740ed7ada39fc025ae30 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:49:09 +0000 Subject: [PATCH 193/292] added new env for ragas embedding model --- backend/src/ragas_eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/ragas_eval.py b/backend/src/ragas_eval.py index a5ce06b5b..8052cb9a2 100644 --- a/backend/src/ragas_eval.py +++ b/backend/src/ragas_eval.py @@ -9,7 +9,7 @@ from src.shared.common_fn import load_embedding_model load_dotenv() -EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL") +EMBEDDING_MODEL = os.getenv("RAGAS_EMBEDDING_MODEL") EMBEDDING_FUNCTION, _ = load_embedding_model(EMBEDDING_MODEL) def get_ragas_metrics(question: str, context: list, answer: list, model: str): From 5c0081e28049376d99cba0a7ae24541963c0c322 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 22 Oct 2024 13:45:47 +0530 Subject: [PATCH 194/292] Props name changes (#811) * Props name changes * removed the accesstoken from row on copy action * props changes for dropzone component * graph view changes --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> --- frontend/src/components/ChatBot/ChunkInfo.tsx | 172 +++++++++--------- frontend/src/components/Content.tsx | 10 +- .../components/DataSources/AWS/S3Modal.tsx | 14 +- .../components/DataSources/GCS/GCSModal.tsx | 18 +- .../components/DataSources/Local/DropZone.tsx | 16 +- .../Local/DropZoneForSmallLayouts.tsx | 16 +- frontend/src/components/FileTable.tsx | 32 ++-- .../EntityExtractionSetting.tsx | 26 ++- .../Popups/GraphEnhancementDialog/index.tsx | 2 +- frontend/src/hooks/useSourceInput.tsx | 14 +- frontend/src/types.ts | 24 +-- 11 files changed, 175 insertions(+), 169 deletions(-) diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index 0ebe12f97..b008a30cf 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -1,7 +1,7 @@ import { FC, useContext, useState } from 'react'; import { ChunkProps, UserCredentials } from '../../types'; import { Box, LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; -import { DocumentTextIconOutline, GlobeAltIconOutline, MagnifyingGlassCircleIconSolid } from '@neo4j-ndl/react/icons'; +import { DocumentTextIconOutline, GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; import wikipedialogo from '../../assets/images/wikipedia.svg'; import youtubelogo from '../../assets/images/youtube.svg'; import gcslogo from '../../assets/images/gcs.webp'; @@ -13,7 +13,6 @@ import { chatModeLables } from '../../utils/Constants'; import { useCredentials } from '../../context/UserCredentials'; import GraphViewModal from '../Graph/GraphViewModal'; import { handleGraphNodeClick } from './chatInfo'; -import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; const ChunkInfo: FC = ({ loading, chunks, mode }) => { const themeUtils = useContext(ThemeWrapperContext); @@ -24,7 +23,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { const [viewPoint, setViewPoint] = useState(''); const [_, setLoadingGraphView] = useState(false); - const handleChunkClick = (elementId: string, viewMode: string) => { + const handleChunkClick = (elementId: string, viewMode: string) => { handleGraphNodeClick( userCredentials as UserCredentials, elementId, @@ -52,16 +51,6 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { <>
            <> - handleChunkClick(chunk.element_id, 'Chunk')} - > - - = ({ loading, chunks, mode }) => {
            Page: {chunk?.page_number}
            +
            + handleChunkClick(chunk.element_id, 'Chunk')} + >{'Graph'} + +
            ) : chunk?.url && chunk?.start_time ? ( <>
            - handleChunkClick(chunk.element_id, 'Chunk')} - > - - = ({ loading, chunks, mode }) => { {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( - Similarity Score: {chunk?.score} + <> + Similarity Score: {chunk?.score} +
            + handleChunkClick(chunk.element_id, 'Chunk')} + >{'Graph'} + +
            + )} ) : chunk?.url && new URL(chunk.url).host === 'wikipedia.org' ? ( <>
            - handleChunkClick(chunk.element_id, 'Chunk')} - > - - {chunk?.fileName}
            {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( - Similarity Score: {chunk?.score} + <> + Similarity Score: {chunk?.score} +
            + handleChunkClick(chunk.element_id, 'Chunk')} + >{'Graph'} + +
            + )} ) : chunk?.url && new URL(chunk.url).host === 'storage.googleapis.com' ? ( <>
            - handleChunkClick(chunk.element_id, 'Chunk')} - > - - {chunk?.fileName}
            {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( - Similarity Score: {chunk?.score} + <> + Similarity Score: {chunk?.score} +
            + handleChunkClick(chunk.element_id, 'Chunk')} + >{'Graph'} + +
            + )} ) : chunk?.url && chunk?.url.startsWith('s3://') ? ( <>
            - handleChunkClick(chunk.element_id, 'Chunk')} - > - - {chunk?.fileName}
            {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( - Similarity Score: {chunk?.score} + <> + Similarity Score: {chunk?.score} +
            + handleChunkClick(chunk.element_id, 'Chunk')} + >{'Graph'} + +
            + )} ) : chunk?.url && @@ -181,16 +183,6 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { !isAllowedHost(chunk?.url, ['storage.googleapis.com', 'wikipedia.org', 'youtube.com']) ? ( <>
            - handleChunkClick(chunk.element_id, 'Chunk')} - > - - {chunk?.url} @@ -199,22 +191,23 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {mode !== chatModeLables.global_vector && mode !== chatModeLables.entity_vector && mode !== chatModeLables.graph && ( - Similarity Score: {chunk?.score} + <> + Similarity Score: {chunk?.score} +
            + handleChunkClick(chunk.element_id, 'Chunk')} + >{'Graph'} + +
            + )} ) : ( <>
            - handleChunkClick(chunk.element_id, 'Chunk')} - > - - {chunk.fileSource === 'local file' ? ( ) : ( @@ -225,12 +218,23 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { className='mr-2' /> )} - - {chunk.fileName} - + <> + + {chunk.fileName} + +
            + handleChunkClick(chunk.element_id, 'Chunk')} + >{'Graph'} + +
            +
            )} diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 3eeace857..eae9caa77 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -308,7 +308,7 @@ const Content: React.FC = ({ userCredentials as UserCredentials, fileItem.fileSource, fileItem.retryOption ?? '', - fileItem.source_url, + fileItem.sourceUrl, localStorage.getItem('accesskey'), localStorage.getItem('secretkey'), fileItem.name ?? '', @@ -316,9 +316,9 @@ const Content: React.FC = ({ fileItem.gcsBucketFolder ?? '', selectedNodes.map((l) => l.value), selectedRels.map((t) => t.value), - fileItem.google_project_id, + fileItem.googleProjectId, fileItem.language, - fileItem.access_token + fileItem.accessToken ); if (apiResponse?.status === 'Failed') { @@ -571,8 +571,8 @@ const Content: React.FC = ({ ...f, status: 'Reprocess', processingProgress: isStartFromBegining ? 0 : f.processingProgress, - NodesCount: isStartFromBegining ? 0 : f.NodesCount, - relationshipCount: isStartFromBegining ? 0 : f.relationshipCount, + NodesCount: isStartFromBegining ? 0 : f.nodesCount, + relationshipCount: isStartFromBegining ? 0 : f.relationshipsCount, } : f; }); diff --git a/frontend/src/components/DataSources/AWS/S3Modal.tsx b/frontend/src/components/DataSources/AWS/S3Modal.tsx index 221fb0c9c..69ac6e7bd 100644 --- a/frontend/src/components/DataSources/AWS/S3Modal.tsx +++ b/frontend/src/components/DataSources/AWS/S3Modal.tsx @@ -30,10 +30,10 @@ const S3Modal: React.FC = ({ hideModal, open }) => { const submitHandler = async (url: string) => { const defaultValues: CustomFileBase = { - processing: 0, + processingTotalTime: 0, status: 'New', - NodesCount: 0, - relationshipCount: 0, + nodesCount: 0, + relationshipsCount: 0, type: 'PDF', model: model, fileSource: 's3 bucket', @@ -81,7 +81,7 @@ const S3Modal: React.FC = ({ hideModal, open }) => { copiedFilesData.unshift({ name: item.fileName, size: item.fileSize, - source_url: item.url, + sourceUrl: item.url, // total_pages: 'N/A', id: uuidv4(), ...defaultValues, @@ -92,9 +92,9 @@ const S3Modal: React.FC = ({ hideModal, open }) => { copiedFilesData.unshift({ ...tempFileData, status: defaultValues.status, - NodesCount: defaultValues.NodesCount, - relationshipCount: defaultValues.relationshipCount, - processing: defaultValues.processing, + nodesCount: defaultValues.nodesCount, + relationshipsCount: defaultValues.relationshipsCount, + processingTotalTime: defaultValues.processingTotalTime, model: defaultValues.model, fileSource: defaultValues.fileSource, processingProgress: defaultValues.processingProgress, diff --git a/frontend/src/components/DataSources/GCS/GCSModal.tsx b/frontend/src/components/DataSources/GCS/GCSModal.tsx index 007ffab2b..039934535 100644 --- a/frontend/src/components/DataSources/GCS/GCSModal.tsx +++ b/frontend/src/components/DataSources/GCS/GCSModal.tsx @@ -23,10 +23,10 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => const { setFilesData, model, filesData } = useFileContext(); const defaultValues: CustomFileBase = { - processing: 0, + processingTotalTime: 0, status: 'New', - NodesCount: 0, - relationshipCount: 0, + nodesCount: 0, + relationshipsCount: 0, type: 'TEXT', model: model, fileSource: 'gcs bucket', @@ -101,9 +101,9 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => size: item.fileSize ?? 0, gcsBucket: item.gcsBucketName, gcsBucketFolder: item.gcsBucketFolder, - google_project_id: item.gcsProjectId, + googleProjectId: item.gcsProjectId, id: uuidv4(), - access_token: codeResponse.access_token, + accessToken: codeResponse.access_token, ...defaultValues, }); } else { @@ -112,13 +112,13 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => copiedFilesData.unshift({ ...tempFileData, status: defaultValues.status, - NodesCount: defaultValues.NodesCount, - relationshipCount: defaultValues.relationshipCount, - processing: defaultValues.processing, + nodesCount: defaultValues.nodesCount, + relationshipsCount: defaultValues.relationshipsCount, + processingTotalTime: defaultValues.processingTotalTime, model: defaultValues.model, fileSource: defaultValues.fileSource, processingProgress: defaultValues.processingProgress, - access_token: codeResponse.access_token, + accessToken: codeResponse.access_token, }); } } diff --git a/frontend/src/components/DataSources/Local/DropZone.tsx b/frontend/src/components/DataSources/Local/DropZone.tsx index 0278e679e..bddbb65f0 100644 --- a/frontend/src/components/DataSources/Local/DropZone.tsx +++ b/frontend/src/components/DataSources/Local/DropZone.tsx @@ -24,13 +24,13 @@ const DropZone: FunctionComponent = () => { setIsLoading(false); if (f.length) { const defaultValues: CustomFileBase = { - processing: 0, + processingTotalTime: 0, status: 'None', - NodesCount: 0, - relationshipCount: 0, + nodesCount: 0, + relationshipsCount: 0, model: model, fileSource: 'local file', - uploadprogess: 0, + uploadProgress: 0, processingProgress: undefined, retryOptionStatus: false, retryOption: '', @@ -46,7 +46,7 @@ const DropZone: FunctionComponent = () => { // @ts-ignore type: `${file.name.substring(file.name.lastIndexOf('.') + 1, file.name.length).toUpperCase()}`, size: file.size, - uploadprogess: file.size && file?.size < chunkSize ? 100 : 0, + uploadProgress: file.size && file?.size < chunkSize ? 100 : 0, id: uuidv4(), ...defaultValues, }); @@ -56,9 +56,9 @@ const DropZone: FunctionComponent = () => { copiedFilesData.unshift({ ...tempFileData, status: defaultValues.status, - NodesCount: defaultValues.NodesCount, - relationshipCount: defaultValues.relationshipCount, - processing: defaultValues.processing, + nodesCount: defaultValues.nodesCount, + relationshipsCount: defaultValues.relationshipsCount, + processingTotalTime: defaultValues.processingTotalTime, model: defaultValues.model, fileSource: defaultValues.fileSource, processingProgress: defaultValues.processingProgress, diff --git a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx index 17c97d0bc..d7fb1e56d 100644 --- a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx +++ b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx @@ -170,13 +170,13 @@ export default function DropZoneForSmallLayouts() { setIsLoading(false); if (f.length) { const defaultValues: CustomFileBase = { - processing: 0, + processingTotalTime: 0, status: 'None', - NodesCount: 0, - relationshipCount: 0, + nodesCount: 0, + relationshipsCount: 0, model: model, fileSource: 'local file', - uploadprogess: 0, + uploadProgress: 0, processingProgress: undefined, retryOption: '', retryOptionStatus: false, @@ -192,7 +192,7 @@ export default function DropZoneForSmallLayouts() { // @ts-ignore type: `${file.name.substring(file.name.lastIndexOf('.') + 1, file.name.length).toUpperCase()}`, size: file.size, - uploadprogess: file.size && file?.size < chunkSize ? 100 : 0, + uploadProgress: file.size && file?.size < chunkSize ? 100 : 0, id: uuidv4(), ...defaultValues, }); @@ -202,9 +202,9 @@ export default function DropZoneForSmallLayouts() { copiedFilesData.unshift({ ...tempFileData, status: defaultValues.status, - NodesCount: defaultValues.NodesCount, - relationshipCount: defaultValues.relationshipCount, - processing: defaultValues.processing, + nodesCount: defaultValues.nodesCount, + relationshipsCount: defaultValues.relationshipsCount, + processingTotalTime: defaultValues.processingTotalTime, model: defaultValues.model, fileSource: defaultValues.fileSource, processingProgress: defaultValues.processingProgress, diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 6289c1adf..2032afd28 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -141,8 +141,8 @@ const FileTable = forwardRef((props, ref) => {
            @@ -307,7 +307,7 @@ const FileTable = forwardRef((props, ref) => { }, }, }), - columnHelper.accessor((row) => row.uploadprogess, { + columnHelper.accessor((row) => row.uploadProgress, { id: 'uploadprogess', cell: (info: CellContext) => { if (parseInt(info.getValue()) === 100 || info.row.original?.status === 'New') { @@ -354,7 +354,7 @@ const FileTable = forwardRef((props, ref) => { return ( - + {info.row.original.fileSource} @@ -487,13 +487,13 @@ const FileTable = forwardRef((props, ref) => { }, }, }), - columnHelper.accessor((row) => row.NodesCount, { + columnHelper.accessor((row) => row.nodesCount, { id: 'NodesCount', cell: (info) => {info.getValue()}, header: () => Nodes, footer: (info) => info.column.id, }), - columnHelper.accessor((row) => row.relationshipCount, { + columnHelper.accessor((row) => row.relationshipsCount, { id: 'relationshipCount', cell: (info) => {info.getValue()}, header: () => Relations, @@ -521,7 +521,11 @@ const FileTable = forwardRef((props, ref) => { label='Copy Row' disabled={info.getValue() === 'Uploading'} clean - onClick={() => handleCopy(info.row.original)} + onClick={() => { + const copied={...info.row.original}; + delete copied.accessToken; + handleCopy(copied); + }} > @@ -661,19 +665,19 @@ const FileTable = forwardRef((props, ref) => { type: item?.fileType?.includes('.') ? item?.fileType?.substring(1)?.toUpperCase() ?? 'None' : item?.fileType?.toUpperCase() ?? 'None', - NodesCount: item?.nodeCount ?? 0, - processing: item?.processingTime ?? 'None', - relationshipCount: item?.relationshipCount ?? 0, + nodesCount: item?.nodeCount ?? 0, + processingTotalTime: item?.processingTime ?? 'None', + relationshipsCount: item?.relationshipCount ?? 0, status: waitingFile ? 'Waiting' : getFileSourceStatus(item), model: item?.model ?? model, id: !waitingFile ? uuidv4() : waitingFile.id, - source_url: item?.url != 'None' && item?.url != '' ? item.url : '', + sourceUrl: item?.url != 'None' && item?.url != '' ? item.url : '', fileSource: item?.fileSource ?? 'None', gcsBucket: item?.gcsBucket, gcsBucketFolder: item?.gcsBucketFolder, errorMessage: item?.errorMessage, - uploadprogess: item?.uploadprogress ?? 0, - google_project_id: item?.gcsProjectId, + uploadProgress: item?.uploadprogress ?? 0, + googleProjectId: item?.gcsProjectId, language: item?.language ?? '', processingProgress: item?.processed_chunk != undefined && @@ -681,7 +685,7 @@ const FileTable = forwardRef((props, ref) => { !isNaN(Math.floor((item?.processed_chunk / item?.total_chunks) * 100)) ? Math.floor((item?.processed_chunk / item?.total_chunks) * 100) : undefined, - access_token: item?.access_token ?? '', + accessToken: item?.accessToken ?? '', retryOption: item.retry_condition ?? '', retryOptionStatus: false, }); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx index 83081dd93..67dd10f9e 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx @@ -18,7 +18,7 @@ export default function EntityExtractionSetting({ openTextSchema, settingView, onContinue, - colseEnhanceGraphSchemaDialog, + closeEnhanceGraphSchemaDialog, }: { view: 'Dialog' | 'Tabs'; open?: boolean; @@ -26,7 +26,7 @@ export default function EntityExtractionSetting({ openTextSchema: () => void; settingView: 'contentView' | 'headerView'; onContinue?: () => void; - colseEnhanceGraphSchemaDialog?: () => void; + closeEnhanceGraphSchemaDialog?: () => void; }) { const { breakpoints } = tokens; const { @@ -240,16 +240,16 @@ export default function EntityExtractionSetting({ ); localStorage.setItem('selectedSchemas', JSON.stringify({ db: userCredentials?.uri, selectedOptions: [] })); showNormalToast(`Successfully Removed the Schema settings`); - if (view === 'Dialog' && onClose != undefined) { - onClose(); + if (view === 'Tabs' && closeEnhanceGraphSchemaDialog != undefined) { + closeEnhanceGraphSchemaDialog(); } }; const handleApply = () => { setIsSchema(true); localStorage.setItem('isSchema', JSON.stringify(true)); showNormalToast(`Successfully Applied the Schema settings`); - if (view === 'Dialog' && onClose != undefined) { - onClose(); + if (view === 'Tabs' && closeEnhanceGraphSchemaDialog != undefined) { + closeEnhanceGraphSchemaDialog(); } localStorage.setItem( 'selectedNodeLabels', @@ -317,9 +317,8 @@ export default function EntityExtractionSetting({ options: nodeLabelOptions, onChange: onChangenodes, value: selectedNodes, - classNamePrefix: `${ - isTablet ? 'tablet_entity_extraction_Tab_node_label' : 'entity_extraction_Tab_node_label' - }`, + classNamePrefix: `${isTablet ? 'tablet_entity_extraction_Tab_node_label' : 'entity_extraction_Tab_node_label' + }`, }} type='creatable' /> @@ -333,9 +332,8 @@ export default function EntityExtractionSetting({ options: relationshipTypeOptions, onChange: onChangerels, value: selectedRels, - classNamePrefix: `${ - isTablet ? 'tablet_entity_extraction_Tab_relationship_label' : 'entity_extraction_Tab_relationship_label' - }`, + classNamePrefix: `${isTablet ? 'tablet_entity_extraction_Tab_relationship_label' : 'entity_extraction_Tab_relationship_label' + }`, }} type='creatable' /> @@ -363,8 +361,8 @@ export default function EntityExtractionSetting({ if (view === 'Dialog' && onClose != undefined) { onClose(); } - if (view === 'Tabs' && colseEnhanceGraphSchemaDialog != undefined) { - colseEnhanceGraphSchemaDialog(); + if (view === 'Tabs' && closeEnhanceGraphSchemaDialog != undefined) { + closeEnhanceGraphSchemaDialog(); } openTextSchema(); }} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx index c4d9ee2f7..c7b621748 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx @@ -102,7 +102,7 @@ export default function GraphEnhancementDialog({ openTextSchema={() => { setShowTextFromSchemaDialog({ triggeredFrom: 'enhancementtab', show: true }); }} - colseEnhanceGraphSchemaDialog={onClose} + closeEnhanceGraphSchemaDialog={onClose} settingView='headerView' />
            diff --git a/frontend/src/hooks/useSourceInput.tsx b/frontend/src/hooks/useSourceInput.tsx index eae33e3d4..7a8b8e7b1 100644 --- a/frontend/src/hooks/useSourceInput.tsx +++ b/frontend/src/hooks/useSourceInput.tsx @@ -47,10 +47,10 @@ export default function useSourceInput( const submitHandler = useCallback( async (url: string) => { const defaultValues: CustomFileBase = { - processing: 0, + processingTotalTime: 0, status: 'New', - NodesCount: 0, - relationshipCount: 0, + nodesCount: 0, + relationshipsCount: 0, type: 'TEXT', model: model, fileSource: fileSource, @@ -122,7 +122,7 @@ export default function useSourceInput( ...defaultValues, }; if (isWikiQuery) { - baseValues.wiki_query = item.fileName; + baseValues.wikiQuery = item.fileName; } copiedFilesData.unshift(baseValues); } else { @@ -131,9 +131,9 @@ export default function useSourceInput( copiedFilesData.unshift({ ...tempFileData, status: defaultValues.status, - NodesCount: defaultValues.NodesCount, - relationshipCount: defaultValues.relationshipCount, - processing: defaultValues.processing, + nodesCount: defaultValues.nodesCount, + relationshipsCount: defaultValues.relationshipsCount, + processingTotalTime: defaultValues.processingTotalTime, model: defaultValues.model, fileSource: defaultValues.fileSource, processingProgress: defaultValues.processingProgress, diff --git a/frontend/src/types.ts b/frontend/src/types.ts index c43ee5020..02dbc0a2b 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -8,24 +8,24 @@ import { BannerType } from '@neo4j-ndl/react'; import Queue from './utils/Queue'; export interface CustomFileBase extends Partial { - processing: number | string; + processingTotalTime: number | string; status: string; - NodesCount: number; - relationshipCount: number; + nodesCount: number; + relationshipsCount: number; model: string; fileSource: string; - source_url?: string; - wiki_query?: string; + sourceUrl?: string; + wikiQuery?: string; gcsBucket?: string; gcsBucketFolder?: string; errorMessage?: string; - uploadprogess?: number; + uploadProgress?: number; processingStatus?: boolean; - google_project_id?: string; + googleProjectId?: string; language?: string; processingProgress?: number; - access_token?: string; - checked?: boolean; + accessToken?: string; + isChecked?: boolean; retryOptionStatus: boolean; retryOption: string; } @@ -45,12 +45,12 @@ export type UserCredentials = { database: string; } & { [key: string]: any }; -export interface SourceNode extends Omit { +export interface SourceNode extends Omit { fileName: string; fileSize: number; fileType: string; nodeCount?: number; - processingTime?: string; + processingTime: string; relationshipCount?: number; url?: string; awsAccessKeyId?: string; @@ -61,7 +61,7 @@ export interface SourceNode extends Omit { retry_condition?: string; } -export type ExtractParams = Pick & { +export type ExtractParams = Pick & { file?: File; aws_access_key_id?: string | null; aws_secret_access_key?: string | null; From ee710028704a50e4fa73ba517da412a6b12b69d9 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Tue, 22 Oct 2024 08:40:05 +0000 Subject: [PATCH 195/292] test --- frontend/src/components/ChatBot/ChunkInfo.tsx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index b008a30cf..68196fb3a 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -21,7 +21,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { const [neoRels, setNeoRels] = useState([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); - const [_, setLoadingGraphView] = useState(false); + const [loadingGraphView, setLoadingGraphView] = useState(false); const handleChunkClick = (elementId: string, viewMode: string) => { handleGraphNodeClick( @@ -73,7 +73,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { handleChunkClick(chunk.element_id, 'Chunk')} >{'Graph'} @@ -100,7 +100,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
            handleChunkClick(chunk.element_id, 'Chunk')} >{'Graph'} @@ -123,7 +123,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
            handleChunkClick(chunk.element_id, 'Chunk')} >{'Graph'} @@ -146,7 +146,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
            handleChunkClick(chunk.element_id, 'Chunk')} >{'Graph'} @@ -169,7 +169,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
            handleChunkClick(chunk.element_id, 'Chunk')} >{'Graph'} @@ -196,7 +196,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
            handleChunkClick(chunk.element_id, 'Chunk')} >{'Graph'} @@ -228,7 +228,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
            handleChunkClick(chunk.element_id, 'Chunk')} >{'Graph'} From c115014cb53099d6130595fdeebcc836a49f9a74 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Tue, 22 Oct 2024 09:21:56 +0000 Subject: [PATCH 196/292] view graph --- frontend/src/components/ChatBot/ChunkInfo.tsx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index 68196fb3a..8db4aaa3e 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -75,7 +75,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { label='Graph view' className={`${loadingGraphView ? 'cursor-wait' : 'cursor-pointer'}`} onClick={() => handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
            @@ -103,7 +103,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { className={`${loadingGraphView ? 'cursor-wait' : 'cursor-pointer'}`} label='Graph view' onClick={() => handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
            @@ -126,7 +126,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { className={`${loadingGraphView ? 'cursor-wait' : 'cursor-pointer'}`} label='Graph view' onClick={() => handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
            @@ -149,7 +149,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { className={`${loadingGraphView ? 'cursor-wait' : 'cursor-pointer'}`} label='Graph view' onClick={() => handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
            @@ -172,7 +172,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { className={`${loadingGraphView ? 'cursor-wait' : 'cursor-pointer'}`} label='Graph view' onClick={() => handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
            @@ -199,7 +199,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { className={`${loadingGraphView ? 'cursor-wait' : 'cursor-pointer'}`} label='Graph view' onClick={() => handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
            @@ -231,7 +231,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { className={`${loadingGraphView ? 'cursor-wait' : 'cursor-pointer'}`} label='Graph view' onClick={() => handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
            From c200b6105630c8666b9df8081826783467498f27 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 22 Oct 2024 09:43:13 +0000 Subject: [PATCH 197/292] nodes count and relationshipcount updation fix --- frontend/src/components/DataSources/AWS/S3Modal.tsx | 2 ++ frontend/src/components/DataSources/GCS/GCSModal.tsx | 2 ++ frontend/src/hooks/useSourceInput.tsx | 2 ++ frontend/src/hooks/useSse.tsx | 12 ++++++------ 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/frontend/src/components/DataSources/AWS/S3Modal.tsx b/frontend/src/components/DataSources/AWS/S3Modal.tsx index 69ac6e7bd..8e809dfff 100644 --- a/frontend/src/components/DataSources/AWS/S3Modal.tsx +++ b/frontend/src/components/DataSources/AWS/S3Modal.tsx @@ -82,6 +82,7 @@ const S3Modal: React.FC = ({ hideModal, open }) => { name: item.fileName, size: item.fileSize, sourceUrl: item.url, + uploadProgress:100, // total_pages: 'N/A', id: uuidv4(), ...defaultValues, @@ -98,6 +99,7 @@ const S3Modal: React.FC = ({ hideModal, open }) => { model: defaultValues.model, fileSource: defaultValues.fileSource, processingProgress: defaultValues.processingProgress, + uploadProgress:100, }); } }); diff --git a/frontend/src/components/DataSources/GCS/GCSModal.tsx b/frontend/src/components/DataSources/GCS/GCSModal.tsx index 039934535..01b405cee 100644 --- a/frontend/src/components/DataSources/GCS/GCSModal.tsx +++ b/frontend/src/components/DataSources/GCS/GCSModal.tsx @@ -105,6 +105,7 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => id: uuidv4(), accessToken: codeResponse.access_token, ...defaultValues, + uploadProgress:100 }); } else { const tempFileData = copiedFilesData[filedataIndex]; @@ -119,6 +120,7 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => fileSource: defaultValues.fileSource, processingProgress: defaultValues.processingProgress, accessToken: codeResponse.access_token, + uploadProgress:100 }); } } diff --git a/frontend/src/hooks/useSourceInput.tsx b/frontend/src/hooks/useSourceInput.tsx index 7a8b8e7b1..cbdc24b8f 100644 --- a/frontend/src/hooks/useSourceInput.tsx +++ b/frontend/src/hooks/useSourceInput.tsx @@ -118,6 +118,7 @@ export default function useSourceInput( source_url: item.url, id: uuidv4(), language: item.language, + uploadProgress:100, // total_pages: 1, ...defaultValues, }; @@ -137,6 +138,7 @@ export default function useSourceInput( model: defaultValues.model, fileSource: defaultValues.fileSource, processingProgress: defaultValues.processingProgress, + uploadProgress:100, }); } } diff --git a/frontend/src/hooks/useSse.tsx b/frontend/src/hooks/useSse.tsx index dadf4d184..5612dc747 100644 --- a/frontend/src/hooks/useSse.tsx +++ b/frontend/src/hooks/useSse.tsx @@ -38,10 +38,10 @@ export default function useServerSideEvent( return { ...curfile, status: total_chunks === processed_chunk ? 'Completed' : status, - NodesCount: nodeCount, - relationshipCount: relationshipCount, + nodesCount: nodeCount, + relationshipsCount: relationshipCount, model: model, - processing: processingTime?.toFixed(2), + processingTotalTime: processingTime?.toFixed(2), processingProgress: Math.floor((processed_chunk / total_chunks) * 100), }; } @@ -57,10 +57,10 @@ export default function useServerSideEvent( return { ...curfile, status: status, - NodesCount: nodeCount, - relationshipCount: relationshipCount, + nodesCount: nodeCount, + relationshipsCount: relationshipCount, model: model, - processing: processingTime?.toFixed(2), + processingTotalTime: processingTime?.toFixed(2), }; } return curfile; From 340679b6db8599ccae2d50672c433f897fea210f Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 22 Oct 2024 10:44:35 +0000 Subject: [PATCH 198/292] sourceUrl Fix --- frontend/src/hooks/useSourceInput.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/hooks/useSourceInput.tsx b/frontend/src/hooks/useSourceInput.tsx index cbdc24b8f..ee75a8cb4 100644 --- a/frontend/src/hooks/useSourceInput.tsx +++ b/frontend/src/hooks/useSourceInput.tsx @@ -115,7 +115,7 @@ export default function useSourceInput( const baseValues = { name: item.fileName, size: item.fileSize, - source_url: item.url, + sourceUrl: item.url, id: uuidv4(), language: item.language, uploadProgress:100, From fb35bda20dbeb84847887dd42cb435c6a3ba2009 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 23 Oct 2024 06:16:26 +0000 Subject: [PATCH 199/292] empty string "" fix to keep the default values we should keep the value blank instead "" --- docker-compose.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 7761704c7..afc87b848 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -52,15 +52,15 @@ services: dockerfile: Dockerfile args: - VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-http://localhost:8000} - - VITE_REACT_APP_SOURCES=${VITE_REACT_APP_SOURCES-local,youtube,wiki,s3} - - VITE_LLM_MODELS=${VITE_LLM_MODELS-diffbot,openai-gpt-3.5,openai-gpt-4o} - - VITE_GOOGLE_CLIENT_ID=${VITE_GOOGLE_CLIENT_ID-""} + - VITE_REACT_APP_SOURCES=${VITE_REACT_APP_SOURCES-local,wiki,s3} + - VITE_LLM_MODELS=${VITE_LLM_MODELS-} + - VITE_GOOGLE_CLIENT_ID=${VITE_GOOGLE_CLIENT_ID-} - VITE_BLOOM_URL=${VITE_BLOOM_URL-https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true} - VITE_TIME_PER_PAGE=${VITE_TIME_PER_PAGE-50} - VITE_CHUNK_SIZE=${VITE_CHUNK_SIZE-5242880} - VITE_LARGE_FILE_SIZE=${VITE_LARGE_FILE_SIZE-5242880} - VITE_ENV=${VITE_ENV-DEV} - - VITE_CHAT_MODES=${VITE_CHAT_MODES-""} + - VITE_CHAT_MODES=${VITE_CHAT_MODES-} - VITE_BATCH_SIZE=${VITE_BATCH_SIZE-2} volumes: - ./frontend:/app From ed18462dcbcd8d2678d2dbf0fcfbe3e77af15501 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 23 Oct 2024 07:07:35 +0000 Subject: [PATCH 200/292] prop changes --- frontend/src/components/Content.tsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index eae9caa77..4b10f4242 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -331,10 +331,10 @@ const Content: React.FC = ({ const apiRes = apiResponse?.data; return { ...curfile, - processing: apiRes?.processingTime?.toFixed(2), + processingProgress: apiRes?.processingTime?.toFixed(2), status: apiRes?.status, - NodesCount: apiRes?.nodeCount, - relationshipCount: apiRes?.relationshipCount, + nodesCount: apiRes?.nodeCount, + relationshipsCount: apiRes?.relationshipCount, model: apiRes?.model, }; } From 985993e2f6a773b46b2e5760f6778beacbff9bca Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 23 Oct 2024 07:10:18 +0000 Subject: [PATCH 201/292] props changes --- frontend/src/components/Content.tsx | 2 +- frontend/src/components/FileTable.tsx | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 4b10f4242..cd7ed14ea 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -571,7 +571,7 @@ const Content: React.FC = ({ ...f, status: 'Reprocess', processingProgress: isStartFromBegining ? 0 : f.processingProgress, - NodesCount: isStartFromBegining ? 0 : f.nodesCount, + nodesCount: isStartFromBegining ? 0 : f.nodesCount, relationshipCount: isStartFromBegining ? 0 : f.relationshipsCount, } : f; diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 2032afd28..b3fbafd55 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -823,10 +823,10 @@ const FileTable = forwardRef((props, ref) => { return { ...curfile, status: status, - NodesCount: nodeCount, + nodesCount: nodeCount, relationshipCount: relationshipCount, model: model, - processing: processingTime?.toFixed(2), + processingTotalTime: processingTime?.toFixed(2), processingProgress: Math.floor((processed_chunk / total_chunks) * 100), }; } @@ -853,7 +853,7 @@ const FileTable = forwardRef((props, ref) => { return { ...curfile, status: status, - NodesCount: nodeCount, + nodesCount: nodeCount, relationshipCount: relationshipCount, processingProgress: Math.floor((processed_chunk / total_chunks) * 100), }; From 220bee77648f8c3e7361eeb88f6355c2cd6d8e5b Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Wed, 23 Oct 2024 13:20:59 +0530 Subject: [PATCH 202/292] retry condition update for failed files (#820) --- backend/score.py | 2 +- backend/src/graphDB_dataAccess.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/backend/score.py b/backend/score.py index 46c4ba137..48945fb7d 100644 --- a/backend/score.py +++ b/backend/score.py @@ -223,7 +223,7 @@ async def extract_knowledge_graph_from_file( except Exception as e: message=f"Failed To Process File:{file_name} or LLM Unable To Parse Content " error_message = str(e) - graphDb_data_Access.update_exception_db(file_name,error_message) + graphDb_data_Access.update_exception_db(file_name,error_message, retry_condition) gcs_file_cache = os.environ.get('GCS_FILE_CACHE') if source_type == 'local file': if gcs_file_cache == 'True': diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index e3b923f37..f5d1b228c 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -17,14 +17,19 @@ class graphDBdataAccess: def __init__(self, graph: Neo4jGraph): self.graph = graph - def update_exception_db(self, file_name, exp_msg): + def update_exception_db(self, file_name, exp_msg, retry_condition): try: job_status = "Failed" result = self.get_current_status_document_node(file_name) is_cancelled_status = result[0]['is_cancelled'] if bool(is_cancelled_status) == True: job_status = 'Cancelled' - self.graph.query("""MERGE(d:Document {fileName :$fName}) SET d.status = $status, d.errorMessage = $error_msg""", + if retry_condition is not None: + retry_condition = None + self.graph.query("""MERGE(d:Document {fileName :$fName}) SET d.status = $status, d.errorMessage = $error_msg, d.retry_condition = $retry_condition""", + {"fName":file_name, "status":job_status, "error_msg":exp_msg, "retry_condition":retry_condition}) + else : + self.graph.query("""MERGE(d:Document {fileName :$fName}) SET d.status = $status, d.errorMessage = $error_msg""", {"fName":file_name, "status":job_status, "error_msg":exp_msg}) except Exception as e: error_message = str(e) From 05085858fdefe76572de86cf85cede7192fd7744 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 23 Oct 2024 15:32:05 +0530 Subject: [PATCH 203/292] Chat modes name changes (#815) * Props name changes * removed the accesstoken from row on copy action * updated chat mode names * Chat Modes Name Changes * lint fixes * using readble format In UI * removal of size to avoid console warning * key add --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> --- backend/src/shared/constants.py | 10 ++--- .../src/components/ChatBot/ChatInfoModal.tsx | 28 +++++++----- .../src/components/ChatBot/ChatModeToggle.tsx | 22 +++++----- .../components/ChatBot/ChatModesSwitch.tsx | 3 +- frontend/src/components/ChatBot/Chatbot.tsx | 4 +- frontend/src/components/ChatBot/ChunkInfo.tsx | 43 ++++++++----------- .../components/ChatBot/CommunitiesInfo.tsx | 2 +- .../src/components/ChatBot/SourcesInfo.tsx | 7 ++- frontend/src/components/Content.tsx | 2 +- .../SelectedJobList.tsx | 13 +++--- frontend/src/context/UsersFiles.tsx | 2 +- frontend/src/utils/Constants.ts | 19 ++++++-- frontend/src/utils/Utils.ts | 10 ++--- 13 files changed, 89 insertions(+), 76 deletions(-) diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index cde354f16..b58fd3a67 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -560,12 +560,12 @@ CHAT_VECTOR_MODE = "vector" CHAT_FULLTEXT_MODE = "fulltext" -CHAT_ENTITY_VECTOR_MODE = "entity search+vector" -CHAT_VECTOR_GRAPH_MODE = "graph+vector" -CHAT_VECTOR_GRAPH_FULLTEXT_MODE = "graph+vector+fulltext" -CHAT_GLOBAL_VECTOR_FULLTEXT_MODE = "global search+vector+fulltext" +CHAT_ENTITY_VECTOR_MODE = "entity_vector" +CHAT_VECTOR_GRAPH_MODE = "graph_vector" +CHAT_VECTOR_GRAPH_FULLTEXT_MODE = "graph_vector_fulltext" +CHAT_GLOBAL_VECTOR_FULLTEXT_MODE = "global_vector" CHAT_GRAPH_MODE = "graph" -CHAT_DEFAULT_MODE = "graph+vector+fulltext" +CHAT_DEFAULT_MODE = "graph_vector_fulltext" CHAT_MODE_CONFIG_MAP= { CHAT_VECTOR_MODE : { diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 757503fb1..4ffdd9acd 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -24,7 +24,7 @@ import ChunkInfo from './ChunkInfo'; import EntitiesInfo from './EntitiesInfo'; import SourcesInfo from './SourcesInfo'; import CommunitiesInfo from './CommunitiesInfo'; -import { chatModeLables, supportedLLmsForRagas } from '../../utils/Constants'; +import { chatModeLables, chatModeReadableLables, supportedLLmsForRagas } from '../../utils/Constants'; import { Relationship } from '@neo4j-nvl/base'; import { getChatMetrics } from '../../services/GetRagasMetric'; import MetricsTab from './MetricsTab'; @@ -71,14 +71,20 @@ const ChatInfoModal: React.FC = ({ const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); const [activeTab, setActiveTab] = useState( - error?.length ? 10 : mode === chatModeLables.global_vector ? 7 : mode === chatModeLables.graph ? 4 : 3 + error?.length + ? 10 + : mode === chatModeLables['global search+vector+fulltext'] + ? 7 + : mode === chatModeLables.graph + ? 4 + : 3 ); const { userCredentials } = useCredentials(); const themeUtils = useContext(ThemeWrapperContext); const [, copy] = useCopyToClipboard(); const [copiedText, setcopiedText] = useState(false); const [showMetricsTable, setShowMetricsTable] = useState(Boolean(metricDetails)); - const [showMultiModeMetrics, setShowMultiModeMetrics] = useState(Boolean(multiModelMetrics.length)) + const [showMultiModeMetrics, setShowMultiModeMetrics] = useState(Boolean(multiModelMetrics.length)); const [multiModeError, setMultiModeError] = useState(''); const actions: CypherCodeBlockProps['actions'] = useMemo( @@ -201,7 +207,7 @@ const ChatInfoModal: React.FC = ({ } } } else { - setShowMultiModeMetrics(true) + setShowMultiModeMetrics(true); toggleMetricsLoading(); const contextarray = Object.values(activeChatmodes).map((r) => { return r.metric_contexts; @@ -255,7 +261,7 @@ const ChatInfoModal: React.FC = ({ To generate this response, the process took {response_time} seconds, utilizing {total_tokens} tokens with the model{' '} {model} in{' '} - {mode !== 'vector' ? mode.replace(/\+/g, ' & ') : mode} mode. + {chatModeReadableLables[mode] !== 'vector' ? chatModeReadableLables[mode].replace(/\+/g, ' & ') : chatModeReadableLables[mode]} mode.
            @@ -263,16 +269,16 @@ const ChatInfoModal: React.FC = ({ {error} ) : ( - {mode === chatModeLables.global_vector ? ( + {mode === chatModeLables['global search+vector+fulltext'] ? ( Communities ) : ( <> {mode != chatModeLables.graph ? Sources used : <>} {mode != chatModeLables.graph ? Chunks : <>} - {mode === chatModeLables.graph_vector || + {mode === chatModeLables['graph+vector'] || mode === chatModeLables.graph || - mode === chatModeLables.graph_vector_fulltext || - mode === chatModeLables.entity_vector ? ( + mode === chatModeLables['graph+vector+fulltext'] || + mode === chatModeLables['entity search+vector'] ? ( Top Entities used ) : ( <> @@ -282,7 +288,7 @@ const ChatInfoModal: React.FC = ({ ) : ( <> )} - {mode === chatModeLables.entity_vector && communities.length ? ( + {mode === chatModeLables['entity search+vector'] && communities.length ? ( Communities ) : ( <> @@ -387,7 +393,7 @@ const ChatInfoModal: React.FC = ({ className='min-h-40' /> - {mode === chatModeLables.entity_vector || mode === chatModeLables.global_vector ? ( + {mode === chatModeLables['entity search+vector'] || mode === chatModeLables['global search+vector+fulltext'] ? ( diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index a892ca01d..52df01a4a 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -2,7 +2,7 @@ import { StatusIndicator, Typography } from '@neo4j-ndl/react'; import { useMemo, useEffect } from 'react'; import { useFileContext } from '../../context/UsersFiles'; import CustomMenu from '../UI/Menu'; -import { chatModeLables, chatModes as AvailableModes } from '../../utils/Constants'; +import { chatModeLables, chatModes as AvailableModes, chatModeReadableLables } from '../../utils/Constants'; import { capitalize } from '@mui/material'; import { capitalizeWithPlus } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; @@ -30,16 +30,16 @@ export default function ChatModeToggle({ if ( chatModes.includes(chatModeLables.graph) || chatModes.includes(chatModeLables.fulltext) || - chatModes.includes(chatModeLables.graph_vector_fulltext) + chatModes.includes(chatModeLables['global search+vector+fulltext']) ) { setchatModes((prev) => prev.filter( - (m) => ![chatModeLables.graph, chatModeLables.fulltext, chatModeLables.graph_vector_fulltext].includes(m) + (m) => ![chatModeLables.graph, chatModeLables.fulltext, chatModeLables['graph+vector+fulltext']].includes(m) ) ); } - if (!chatModes.includes(chatModeLables.vector)) { - setchatModes([chatModeLables.vector]); + if (!(chatModes.includes(chatModeLables.vector) || chatModes.includes(chatModeLables['graph+vector']))) { + setchatModes([chatModeLables['graph+vector']]); } } }, [selectedRows.length, chatModes.length]); @@ -47,16 +47,16 @@ export default function ChatModeToggle({ const memoizedChatModes = useMemo(() => { return isGdsActive && isCommunityAllowed ? AvailableModes - : AvailableModes?.filter((m) => !m.mode.includes(chatModeLables.global_vector)); + : AvailableModes?.filter((m) => !m.mode.includes(chatModeLables['global search+vector+fulltext'])); }, [isGdsActive, isCommunityAllowed]); const menuItems = useMemo(() => { return memoizedChatModes?.map((m) => { const isDisabled = Boolean( - selectedRows.length && !(m.mode === chatModeLables.vector || m.mode === chatModeLables.graph_vector) + selectedRows.length && !(m.mode === chatModeLables.vector || m.mode === chatModeLables['graph+vector']) ); const handleModeChange = () => { if (isDisabled) { - setchatModes([chatModeLables.graph_vector]); + setchatModes([chatModeLables['graph+vector']]); } else if (chatModes.includes(m.mode)) { setchatModes((prev) => prev.filter((i) => i != m.mode)); } else { @@ -68,7 +68,9 @@ export default function ChatModeToggle({ title: (
            - {m.mode.includes('+') ? capitalizeWithPlus(m.mode) : capitalize(m.mode)} + {chatModeReadableLables[m.mode].includes('+') + ? capitalizeWithPlus(chatModeReadableLables[m.mode]) + : capitalize(chatModeReadableLables[m.mode])}
            {m.description} @@ -97,7 +99,7 @@ export default function ChatModeToggle({ useEffect(() => { if (!selectedRows.length && !chatModes.length) { - setchatModes([chatModeLables.graph_vector_fulltext]); + setchatModes([]); } }, [selectedRows.length, chatModes.length]); return ( diff --git a/frontend/src/components/ChatBot/ChatModesSwitch.tsx b/frontend/src/components/ChatBot/ChatModesSwitch.tsx index a958b1a06..db91e3372 100644 --- a/frontend/src/components/ChatBot/ChatModesSwitch.tsx +++ b/frontend/src/components/ChatBot/ChatModesSwitch.tsx @@ -2,6 +2,7 @@ import { Flex, IconButton } from '@neo4j-ndl/react'; import { ChevronLeftIconSolid, ChevronRightIconSolid } from '@neo4j-ndl/react/icons'; import TipWrapper from '../UI/TipWrapper'; import { capitalize, capitalizeWithPlus } from '../../utils/Utils'; +import { chatModeReadableLables } from '../../utils/Constants'; export default function ChatModesSwitch({ switchToOtherMode, @@ -16,7 +17,7 @@ export default function ChatModesSwitch({ currentMode: string; isFullScreen: boolean; }) { - const chatmodetoshow = currentMode.includes('+') ? capitalizeWithPlus(currentMode) : capitalize(currentMode); + const chatmodetoshow =chatModeReadableLables[currentMode].includes('+') ? capitalizeWithPlus(chatModeReadableLables[currentMode]) : capitalize(chatModeReadableLables[currentMode]); return ( = (props) => { const [responseTime, setResponseTime] = useState(0); const [tokensUsed, setTokensUsed] = useState(0); const [cypherQuery, setcypherQuery] = useState(''); - const [chatsMode, setChatsMode] = useState(chatModeLables.graph_vector_fulltext); + const [chatsMode, setChatsMode] = useState(chatModeLables['graph+vector+fulltext']); const [graphEntitites, setgraphEntitites] = useState<[]>([]); const [messageError, setmessageError] = useState(''); const [entitiesModal, setEntitiesModal] = useState([]); @@ -583,7 +583,7 @@ const Chatbot: FC = (props) => { }} onClose={() => setShowInfoModal(false)} open={showInfoModal} - size={activeChat?.currentMode === chatModeLables.entity_vector ? 'large' : 'medium'} + size={activeChat?.currentMode === chatModeLables['entity search+vector'] ? 'large' : 'medium'} >
            = ({ loading, chunks, mode }) => {
            - {mode !== chatModeLables.global_vector && - mode !== chatModeLables.entity_vector && + {mode !== chatModeLables['global search+vector+fulltext'] && + mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && chunk.score && ( Similarity Score: {chunk?.score} @@ -71,11 +71,10 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
            handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
            @@ -92,14 +91,13 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
            - {mode !== chatModeLables.global_vector && - mode !== chatModeLables.entity_vector && + {mode !== chatModeLables['global search+vector+fulltext'] && + mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> Similarity Score: {chunk?.score}
            handleChunkClick(chunk.element_id, 'Chunk')} @@ -115,18 +113,17 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.fileName}
            - {mode !== chatModeLables.global_vector && - mode !== chatModeLables.entity_vector && + {mode !== chatModeLables['global search+vector+fulltext'] && + mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> Similarity Score: {chunk?.score}
            handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
            @@ -138,18 +135,17 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.fileName}
            - {mode !== chatModeLables.global_vector && - mode !== chatModeLables.entity_vector && + {mode !== chatModeLables['global search+vector+fulltext'] && + mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> Similarity Score: {chunk?.score}
            handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
            @@ -161,18 +157,17 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.fileName}
            - {mode !== chatModeLables.global_vector && - mode !== chatModeLables.entity_vector && + {mode !== chatModeLables['global search+vector+fulltext'] && + mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> Similarity Score: {chunk?.score}
            handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
            @@ -188,18 +183,17 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.url}
            - {mode !== chatModeLables.global_vector && - mode !== chatModeLables.entity_vector && + {mode !== chatModeLables['global search+vector+fulltext'] && + mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> Similarity Score: {chunk?.score}
            handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
            @@ -227,11 +221,10 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
            handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
            diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index 1a769a1f3..bc8e5e8d3 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -47,7 +47,7 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = onClick={() => handleCommunityClick(community.element_id, 'chatInfoView')} >{`ID : ${community.id}`} - {mode === chatModeLables.global_vector && community.score && ( + {mode === chatModeLables['global search+vector+fulltext'] && community.score && ( Score : {community.score} diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx index 35d76aefb..a934913d9 100644 --- a/frontend/src/components/ChatBot/SourcesInfo.tsx +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -16,10 +16,9 @@ const filterUniqueChunks = (chunks: Chunk[]) => { const sourceCheck = `${chunk.fileName}-${chunk.fileSource}`; if (chunkSource.has(sourceCheck)) { return false; - } - chunkSource.add(sourceCheck); - return true; - + } + chunkSource.add(sourceCheck); + return true; }); }; diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index cd7ed14ea..c5dceaf36 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -552,7 +552,7 @@ const Content: React.FC = ({ setSelectedNodes([]); setSelectedRels([]); setClearHistoryData(true); - setchatModes([chatModeLables.graph_vector_fulltext]); + setchatModes([chatModeLables['graph+vector+fulltext']]); }; const retryHandler = async (filename: string, retryoption: string) => { diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx index 1cb3c7310..348c97af2 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx @@ -11,18 +11,19 @@ export default function SelectedJobList({ }) { const ongoingPostProcessingTasks = useMemo( () => - (isGdsActive - ? postProcessingTasks.includes('enable_communities') - ? postProcessingTasks - : postProcessingTasks.filter((s) => s != 'enable_communities') - : postProcessingTasks.filter((s) => s != 'enable_communities')), + (isGdsActive + ? postProcessingTasks.includes('enable_communities') + ? postProcessingTasks + : postProcessingTasks.filter((s) => s != 'enable_communities') + : postProcessingTasks.filter((s) => s != 'enable_communities')), [isGdsActive, postProcessingTasks] ); return ( - {ongoingPostProcessingTasks.map((task) => { + {ongoingPostProcessingTasks.map((task, idx) => { return ( {task diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index 14f5a0245..935f75f10 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -29,7 +29,7 @@ const FileContextProvider: FC = ({ children }) => { const [selectedSchemas, setSelectedSchemas] = useState([]); const [rowSelection, setRowSelection] = useState>({}); const [selectedRows, setSelectedRows] = useState([]); - const [chatModes, setchatModes] = useState([chatModeLables.graph_vector_fulltext]); + const [chatModes, setchatModes] = useState([chatModeLables['graph+vector+fulltext']]); const [isSchema, setIsSchema] = useState(false); const [showTextFromSchemaDialog, setShowTextFromSchemaDialog] = useState({ triggeredFrom: '', diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 8ab428999..0043dc241 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -47,6 +47,17 @@ export const supportedLLmsForRagas = [ 'bedrock_claude_3_5_sonnet', ]; export const chatModeLables = { + vector: 'vector', + graph: 'graph', + 'graph+vector': 'graph_vector', + fulltext: 'fulltext', + 'graph+vector+fulltext': 'graph_vector_fulltext', + 'entity search+vector': 'entity_vector', + unavailableChatMode: 'Chat mode is unavailable when files are selected', + selected: 'Selected', + 'global search+vector+fulltext': 'global_vector', +}; +export const chatModeReadableLables: Record = { vector: 'vector', graph: 'graph', graph_vector: 'graph+vector', @@ -73,7 +84,7 @@ export const chatModes = description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', }, { - mode: chatModeLables.graph_vector, + mode: chatModeLables['graph+vector'], description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', }, { @@ -81,15 +92,15 @@ export const chatModes = description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', }, { - mode: chatModeLables.graph_vector_fulltext, + mode: chatModeLables['graph+vector+fulltext'], description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', }, { - mode: chatModeLables.entity_vector, + mode: chatModeLables['entity search+vector'], description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', }, { - mode: chatModeLables.global_vector, + mode: chatModeLables['global search+vector+fulltext'], description: 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', }, diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 226fd03f9..6945c17ca 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -417,15 +417,15 @@ export const getDescriptionForChatMode = (mode: string): string => { return 'Utilizes vector indexing on text chunks to enable semantic similarity search.'; case chatModeLables.graph: return 'Leverages text-to-cypher translation to query a database and retrieve relevant data, ensuring a highly targeted and contextually accurate response.'; - case chatModeLables.graph_vector: + case chatModeLables['graph+vector']: return 'Combines vector indexing on text chunks with graph connections, enhancing search results with contextual relevance by considering relationships between concepts.'; case chatModeLables.fulltext: return 'Employs a fulltext index on text chunks for rapid keyword-based search, efficiently identifying documents containing specific words or phrases.'; - case chatModeLables.graph_vector_fulltext: + case chatModeLables['graph+vector+fulltext']: return 'Merges vector indexing, graph connections, and fulltext indexing for a comprehensive search approach, combining semantic similarity, contextual relevance, and keyword-based search for optimal results.'; - case chatModeLables.entity_vector: + case chatModeLables['entity search+vector']: return 'Combines entity node vector indexing with graph connections for accurate entity-based search, providing the most relevant response.'; - case chatModeLables.global_vector: + case chatModeLables['global search+vector+fulltext']: return 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.'; default: return 'Chat mode description not available'; // Fallback description @@ -510,7 +510,7 @@ export function downloadClickHandler( } export function getNodes(nodesData: Array, mode: string) { return nodesData.map((n) => { - if (!n.labels.length && mode === chatModeLables.entity_vector) { + if (!n.labels.length && mode === chatModeLables['entity search+vector']) { return { ...n, labels: ['Entity'], From 9dba36139bca079dbf52b1ce1a589b77882c9022 Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Wed, 23 Oct 2024 16:59:47 +0530 Subject: [PATCH 204/292] Youtube transcript fix with proxy (#822) --- backend/example.env | 1 + backend/src/document_sources/youtube.py | 25 ++++++++++++++----------- backend/src/main.py | 5 +++-- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/backend/example.env b/backend/example.env index 75b817220..5d366f21c 100644 --- a/backend/example.env +++ b/backend/example.env @@ -40,4 +40,5 @@ LLM_MODEL_CONFIG_anthropic_claude_3_5_sonnet="model_name,anthropic_api_key" LLM_MODEL_CONFIG_fireworks_llama_v3_70b="model_name,fireworks_api_key" LLM_MODEL_CONFIG_bedrock_claude_3_5_sonnet="model_name,aws_access_key_id,aws_secret__access_key,region_name" LLM_MODEL_CONFIG_ollama_llama3="model_name,model_local_url" +YOUTUBE_TRANSCRIPT_PROXY="https://user:pass@domain:port" diff --git a/backend/src/document_sources/youtube.py b/backend/src/document_sources/youtube.py index fce62774d..e30de301e 100644 --- a/backend/src/document_sources/youtube.py +++ b/backend/src/document_sources/youtube.py @@ -17,22 +17,25 @@ def get_youtube_transcript(youtube_id): try: #transcript = YouTubeTranscriptApi.get_transcript(youtube_id) - transcript_list = YouTubeTranscriptApi.list_transcripts(youtube_id) - transcript = transcript_list.find_transcript(["en"]) - transcript_pieces: List[Dict[str, Any]] = transcript.fetch() + # transcript_list = YouTubeTranscriptApi.list_transcripts(youtube_id) + # transcript = transcript_list.find_transcript(["en"]) + # transcript_pieces: List[Dict[str, Any]] = transcript.fetch() + proxy = os.environ.get("YOUTUBE_TRANSCRIPT_PROXY") + proxies = { 'https': proxy } + transcript_pieces = YouTubeTranscriptApi.get_transcript(youtube_id, proxies = proxies) return transcript_pieces except Exception as e: message = f"Youtube transcript is not available for youtube Id: {youtube_id}" raise Exception(message) -def get_youtube_combined_transcript(youtube_id): - try: - transcript_dict = get_youtube_transcript(youtube_id) - transcript = YouTubeTranscriptApi.get_transcript(youtube_id) - return transcript - except Exception as e: - message = f"Youtube transcript is not available for youtube Id: {youtube_id}" - raise Exception(message) +# def get_youtube_combined_transcript(youtube_id): +# try: +# transcript_dict = get_youtube_transcript(youtube_id) +# transcript = YouTubeTranscriptApi.get_transcript(youtube_id) +# return transcript +# except Exception as e: +# message = f"Youtube transcript is not available for youtube Id: {youtube_id}" +# raise Exception(message) def get_youtube_combined_transcript(youtube_id): try: diff --git a/backend/src/main.py b/backend/src/main.py index 425a24451..3691b7465 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -155,7 +155,7 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type): # logging.warning("credntial file path not exist") video_id = parse_qs(urlparse(youtube_url).query).get('v') - print(f'Video Id Youtube: {video_id}') + # google_api_client = GoogleApiClient(service_account_path=Path(file_path)) # youtube_loader_channel = GoogleApiYoutubeLoader( # google_api_client=google_api_client, @@ -165,6 +165,7 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type): # page_content = youtube_transcript[0].page_content obj_source_node.file_name = match.group(1)#youtube_transcript[0].metadata["snippet"]["title"] + #obj_source_node.file_name = YouTube(youtube_url).title transcript= get_youtube_combined_transcript(match.group(1)) print(transcript) if transcript==None or len(transcript)==0: @@ -435,7 +436,7 @@ async def processing_source(uri, userName, password, database, model, file_name, else: logging.info('File does not process because it\'s already in Processing status') else: - error_message = "Unable to get the status of docuemnt node." + error_message = "Unable to get the status of document node." logging.error(error_message) raise Exception(error_message) From 6839e522f5494578453ee5dad841519dd4ad5f6f Mon Sep 17 00:00:00 2001 From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Date: Wed, 23 Oct 2024 14:31:40 +0000 Subject: [PATCH 205/292] update script for async func --- backend/test_integrationqa.py | 80 ++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 34 deletions(-) diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 0281c91f8..548e9706f 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -1,15 +1,19 @@ import json +import asyncio import os import shutil import logging import pandas as pd from datetime import datetime as dt from dotenv import load_dotenv -from score import * +# from score import * from src.main import * from src.QA_integration import QA_RAG from langserve import add_routes - +from src.ragas_eval import get_ragas_metrics +from datasets import Dataset +from ragas import evaluate +from ragas.metrics import answer_relevancy, context_utilization, faithfulness # Load environment variables if needed load_dotenv() import os @@ -57,11 +61,11 @@ def test_graph_from_file_local(model_name): create_source_node_local(graph, model_name, file_name) merged_file_path = os.path.join(MERGED_DIR, file_name) - local_file_result = extract_graph_from_file_local_file( - URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '',None - ) + local_file_result = asyncio.run(extract_graph_from_file_local_file( + URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '',None)) logging.info("Local file processing complete") print(local_file_result) + return local_file_result # try: # assert local_file_result['status'] == 'Completed' @@ -72,19 +76,19 @@ def test_graph_from_file_local(model_name): # print("Fail: ", e) # Delete the file after processing - delete_extracted_files(merged_file_path) +# delete_extracted_fiKles(merged_file_path) - return local_file_result + #return local_file_result def test_graph_from_wikipedia(model_name): # try: """Test graph creation from a Wikipedia page.""" - wiki_query = 'https://en.wikipedia.org/wiki/Ram_Mandir' + wiki_query = 'https://en.wikipedia.org/wiki/Berkshire_Hathaway' source_type = 'Wikipedia' - file_name = "Ram_Mandir" + file_name = "Berkshire_Hathaway" create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, source_type) - wiki_result = extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 'en',file_name, '', '',None) + wiki_result = asyncio.run(extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 'en',file_name, '', '',None)) logging.info("Wikipedia test done") print(wiki_result) try: @@ -107,7 +111,7 @@ def test_graph_website(model_name): file_name = [] create_source_node_graph_web_url(graph, model_name, source_url, source_type) - weburl_result = extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url,file_name, '', '',None) + weburl_result = asyncio.run(extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url,file_name, '', '',None)) logging.info("WebUrl test done") print(weburl_result) @@ -164,9 +168,9 @@ def disconected_nodes(): print(nodes_list[0]["e"]["elementId"]) status = "False" if total_nodes['total']>0: - status = "True" + status = "get_unconnected_nodes_list.. records loaded successfully" else: - status = "False" + status = "get_unconnected_nodes_list ..records not loaded" return nodes_list[0]["e"]["elementId"], status #Test Delete delete_disconnected_nodes list @@ -177,9 +181,9 @@ def delete_disconected_nodes(lst_element_id): result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id)) print(f'delete disconnect api result {result}') if not result: - return "True" + return "delete_unconnected_nodes..Succesfully deleted first index of disconnected nodes" else: - return "False" + return "delete_unconnected_nodes..Unable to delete Nodes" #Test Get Duplicate_nodes def get_duplicate_nodes(): @@ -187,28 +191,34 @@ def get_duplicate_nodes(): graphDb_data_Access = graphDBdataAccess(graph) nodes_list, total_nodes = graphDb_data_Access.get_duplicate_nodes_list() if total_nodes['total']>0: - return "True" + return "Data successfully loaded" else: - return "False" + return "Unable to load data" #Test populate_graph_schema -def test_populate_graph_schema_from_text(model): - result_schema = populate_graph_schema_from_text('When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble.', model, True) - print(result_schema) - return result_schema - +def test_populate_graph_schema_from_text(model_name): + schema_text =('Amazon was founded on July 5, 1994, by Jeff Bezos in Bellevue, Washington.The company originally started as an online marketplace for books but gradually expanded its offerings to include a wide range of product categories. This diversification led to it being referred.') + #result_schema='' + try: + result_schema = populate_graph_schema_from_text(schema_text, model_name, True) + print(result_schema) + return result_schema + except Exception as e: + print("Failed to get schema from text", e) + return e def run_tests(): final_list = [] error_list = [] - models = ['openai-gpt-3.5','openai-gpt-4o','openai-gpt-4o-mini','gemini-1.5-pro','gemini 1.5 Flash','azure_ai_gpt_35','azure_ai_gpt_4o','ollama_llama3','ollama','groq_llama3_70b','anthropic_claude_3_5_sonnet','bedrock_claude_3_5_sonnet','fireworks_llama_v3p2_90b'] + + models = ['openai_gpt_3_5','openai_gpt_4o','openai_gpt_4o_mini','azure-ai-gpt-35','azure-ai-gpt-4o','gemini_1_5_pro','gemini_1_5_flash','anthropic-claude-3-5-sonnet','bedrock-claude-3-5-sonnet','groq-llama3-70b','fireworks-llama-v3-70b'] for model_name in models: try: final_list.append(test_graph_from_file_local(model_name)) final_list.append(test_graph_from_wikipedia(model_name)) - final_list.append(test_populate_graph_schema_from_text(model_name)) final_list.append(test_graph_website(model_name)) + final_list.append(test_populate_graph_schema_from_text(model_name)) final_list.append(test_graph_from_youtube_video(model_name)) final_list.append(test_chatbot_qna(model_name)) final_list.append(test_chatbot_qna(model_name, mode='vector')) @@ -219,23 +229,25 @@ def run_tests(): except Exception as e: error_list.append((model_name, str(e))) - # #Compare and log diffrences in graph results - # # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results - # test_populate_graph_schema_from_text('openai-gpt-4o') -# dis_elementid, dis_status = disconected_nodes() -# lst_element_id = [dis_elementid] -# delt = delete_disconected_nodes(lst_element_id) + +# test_populate_graph_schema_from_text('openai-gpt-4o') +#delete diconnected nodes + dis_elementid, dis_status = disconected_nodes() + lst_element_id = [dis_elementid] + delt = delete_disconected_nodes(lst_element_id) # dup = get_duplicate_nodes() print(final_list) - # schma = test_populate_graph_schema_from_text(model) + schma = test_populate_graph_schema_from_text(model_name) # Save final results to CSV df = pd.DataFrame(final_list) print(df) df['execution_date'] = dt.today().strftime('%Y-%m-%d') -# df['disconnected_nodes']=dis_status +#diconnected nodes + df['disconnected_nodes']=dis_status # df['get_duplicate_nodes']=dup -# df['delete_disconected_nodes']=delt - # df['test_populate_graph_schema_from_text'] = schma + + df['delete_disconected_nodes']=delt + df['test_populate_graph_schema_from_text'] = schma df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) # Save error details to CSV From a5d29fa728121b5c358d4c043535b80aae218fdf Mon Sep 17 00:00:00 2001 From: kaustubh-darekar Date: Thu, 24 Oct 2024 11:59:54 +0530 Subject: [PATCH 206/292] ragas changes for graph retrieval mode. context added in api output (#825) --- backend/example.env | 1 + backend/src/QA_integration.py | 9 +++------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/backend/example.env b/backend/example.env index 5d366f21c..7fa3cb480 100644 --- a/backend/example.env +++ b/backend/example.env @@ -2,6 +2,7 @@ OPENAI_API_KEY = "" DIFFBOT_API_KEY = "" GROQ_API_KEY = "" EMBEDDING_MODEL = "all-MiniLM-L6-v2" +RAGAS_EMBEDDING_MODEL = "openai" IS_EMBEDDING = "true" KNN_MIN_SCORE = "0.94" # Enable Gemini (default is False) | Can be False or True diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 468069531..d56ee5ccb 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -434,10 +434,6 @@ def process_chat_response(messages, history, question, model, graph, document_na result = {"sources": list(), "nodedetails": list(), "entities": list()} total_tokens = 0 formatted_docs = "" - - # question = transformed_question if transformed_question else question - # metrics = get_ragas_metrics(question,formatted_docs,content) - # print(metrics) ai_response = AIMessage(content=content) messages.append(ai_response) @@ -580,7 +576,7 @@ def process_graph_response(model, graph, question, messages, history): summarization_thread = threading.Thread(target=summarize_and_log, args=(history, messages, qa_llm)) summarization_thread.start() logging.info("Summarization thread started.") - + metric_details = {"question":question,"contexts":graph_response.get("context", ""),"answer":ai_response_content} result = { "session_id": "", "message": ai_response_content, @@ -589,7 +585,8 @@ def process_graph_response(model, graph, question, messages, history): "cypher_query": graph_response.get("cypher_query", ""), "context": graph_response.get("context", ""), "mode": "graph", - "response_time": 0 + "response_time": 0, + "metric_details": metric_details, }, "user": "chatbot" } From cb59a2a1618cdb0e134c59e22c98800f85804819 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Thu, 24 Oct 2024 09:19:00 +0000 Subject: [PATCH 207/292] Remove extract latency from logging and add LIMIT in duplicate nodes --- backend/score.py | 13 +++++++------ backend/src/graphDB_dataAccess.py | 5 +++-- backend/src/main.py | 17 +++++++++-------- .../Deduplication/index.tsx | 8 +++++--- 4 files changed, 24 insertions(+), 19 deletions(-) diff --git a/backend/score.py b/backend/score.py index 48945fb7d..95caa8b0f 100644 --- a/backend/score.py +++ b/backend/score.py @@ -190,22 +190,22 @@ async def extract_knowledge_graph_from_file( if source_type == 'local file': merged_file_path = os.path.join(MERGED_DIR,file_name) logging.info(f'File path:{merged_file_path}') - result = await extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, retry_condition) + uri_latency, result = await extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 's3 bucket' and source_url: - result = await extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition) + uri_latency, result = await extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'web-url': - result = await extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) + uri_latency, result = await extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'youtube' and source_url: - result = await extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) + uri_latency, result = await extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'Wikipedia' and wiki_query: - result = await extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition) + uri_latency, result = await extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'gcs bucket' and gcs_bucket_name: - result = await extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition) + uri_latency, result = await extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition) else: return create_api_response('Failed',message='source_type is other than accepted source') extract_api_time = time.time() - start_time @@ -218,6 +218,7 @@ async def extract_knowledge_graph_from_file( result['logging_time'] = formatted_time(datetime.now(timezone.utc)) result['elapsed_api_time'] = f'{extract_api_time:.2f}' logger.log_struct(result, "INFO") + result.update(uri_latency) logging.info(f"extraction completed in {extract_api_time:.2f} seconds for file name {file_name}") return create_api_response('Success', data=result, file_source= source_type) except Exception as e: diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index f5d1b228c..189d581a6 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -390,13 +390,14 @@ def get_duplicate_nodes_list(self): [s in similar | s {.id, .description, labels:labels(s), elementId: elementId(s)}] as similar, collect(distinct doc.fileName) as documents, count(distinct c) as chunkConnections ORDER BY e.id ASC + LIMIT 100 """ - return_query_duplicate_nodes_total = "RETURN COUNT(DISTINCT(n)) as total" + total_duplicate_nodes = "RETURN COUNT(DISTINCT(n)) as total" param = {"duplicate_score_value": score_value, "duplicate_text_distance" : text_distance} nodes_list = self.execute_query(query_duplicate_nodes.format(return_statement=return_query_duplicate_nodes),param=param) - total_nodes = self.execute_query(query_duplicate_nodes.format(return_statement=return_query_duplicate_nodes_total),param=param) + total_nodes = self.execute_query(query_duplicate_nodes.format(return_statement=total_duplicate_nodes),param=param) return nodes_list, total_nodes[0] def merge_duplicate_nodes(self,duplicate_nodes_list): diff --git a/backend/src/main.py b/backend/src/main.py index 3691b7465..05f83f073 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -424,15 +424,16 @@ async def processing_source(uri, userName, password, database, model, file_name, uri_latency["Per_entity_latency"] = 'N/A' else: uri_latency["Per_entity_latency"] = f'{int(processing_source_func)/node_count}/s' - uri_latency["fileName"] = file_name - uri_latency["nodeCount"] = node_count - uri_latency["relationshipCount"] = rel_count - uri_latency["total_processing_time"] = round(processed_time.total_seconds(),2) - uri_latency["status"] = job_status - uri_latency["model"] = model - uri_latency["success_count"] = 1 + response = {} + response["fileName"] = file_name + response["nodeCount"] = node_count + response["relationshipCount"] = rel_count + response["total_processing_time"] = round(processed_time.total_seconds(),2) + response["status"] = job_status + response["model"] = model + response["success_count"] = 1 - return uri_latency + return uri_latency, response else: logging.info('File does not process because it\'s already in Processing status') else: diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 420fbe590..1e6b0c213 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -45,7 +45,7 @@ export default function DeduplicationTab() { const [neoRels, setNeoRels] = useState([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); - + const [nodesCount, setNodesCount] = useState(0); const fetchDuplicateNodes = useCallback(async () => { try { setLoading(true); @@ -56,6 +56,8 @@ export default function DeduplicationTab() { } if (duplicateNodesData.data.data.length) { setDuplicateNodes(duplicateNodesData.data.data); + //@ts-ignore + setNodesCount(duplicateNodesData.data.message.total) } else { setDuplicateNodes([]); } @@ -272,9 +274,9 @@ export default function DeduplicationTab() { accuracy and clarity of your knowledge graph. - {duplicateNodes.length > 0 && ( + {nodesCount > 0 && ( - Total Duplicate Nodes: {duplicateNodes.length} + Total Duplicate Nodes: {nodesCount} )} From 93d7f3baa42e3ba804264e4044efb8c6cb94414f Mon Sep 17 00:00:00 2001 From: kaustubh-darekar Date: Thu, 24 Oct 2024 15:28:46 +0530 Subject: [PATCH 208/292] Document updates (#828) * document updated with ragas evaluation information * formatting changes * chatbot api documentation updated * api details added in document * function name changed for drop create vector index api --- README.md | 2 +- backend/score.py | 2 +- docs/backend/backend_docs.adoc | 228 ++++++++++++++++++++++++++++++--- 3 files changed, 212 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 01342a8a4..e820f04a9 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ Allow unauthenticated request : Yes | GCS_FILE_CACHE | Optional | False | If set to True, will save the files to process into GCS. If set to False, will save the files locally | | ENTITY_EMBEDDING | Optional | False | If set to True, It will add embeddings for each entity in database | | LLM_MODEL_CONFIG_ollama_ | Optional | | Set ollama config as - model_name,model_local_url for local deployments | - +| RAGAS_EMBEDDING_MODEL | Optional | openai | embedding model used by ragas evaluation framework | diff --git a/backend/score.py b/backend/score.py index 95caa8b0f..f7b8c4082 100644 --- a/backend/score.py +++ b/backend/score.py @@ -749,7 +749,7 @@ async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), da gc.collect() @app.post("/drop_create_vector_index") -async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(), isVectorIndexExist=Form()): +async def drop_create_vector_index(uri=Form(), userName=Form(), password=Form(), database=Form(), isVectorIndexExist=Form()): try: payload_json_obj = {'api_name':'drop_create_vector_index', 'db_url':uri, 'userName':userName, 'database':database, 'isVectorIndexExist':isVectorIndexExist, 'logging_time': formatted_time(datetime.now(timezone.utc))} diff --git a/docs/backend/backend_docs.adoc b/docs/backend/backend_docs.adoc index 91b39260c..3924e2a17 100644 --- a/docs/backend/backend_docs.adoc +++ b/docs/backend/backend_docs.adoc @@ -374,6 +374,8 @@ The API responsible for a chatbot system designed to leverage multiple AI models * `model`= LLM model * `question`= User query for the chatbot * `session_id`= Session ID used to maintain the history of chats during the user's connection +* `document_names`= File name/names for which user wants to question +* `mode`= Retrieval mode used for answer generation **Response :** [source,json,indent=0] @@ -381,26 +383,51 @@ The API responsible for a chatbot system designed to leverage multiple AI models { "status": "Success", "data": { - "session_id": "0901", - "message": "Fibrosis, also known as fibrotic scarring, is a pathological wound healing process where connective tissue replaces normal parenchymal tissue." + "session_id": "d345d9b6-f8f1-40c0-b585-280c998b9752", + "message": "Google DeepMind is a British-American artificial intelligence research laboratory that focuses on developing advanced AI technologies. It is known for creating neural network models capable of playing video games and board games, such as the AlphaGo program, which famously defeated a world champion Go player. DeepMind also works on general-purpose AI systems, like AlphaZero, which can play multiple games at a high level. Additionally, the company has been involved in AI safety work and has established an ethics board to guide its AI development.", "info": { "sources": [ - { - "source_name": "https://en.wikipedia.org/wiki/Fibrosis", - "page_numbers": [], - "start_time": [] - } + "https://en.wikipedia.org/wiki/Google_DeepMind" ], - "model": "gpt-4o", - "chunkids": [ - "54d8c0dbefb67f1ed3f6939d59267e1ff557a94c", - "4cc02ee8419706c8decdf71ab0d3896aad5c7dca", - "266ce95311bb1921791b4f1cd29a48d433027139", - "11e19513247e1e396475728fa6a197695045b248", - "8bafa01b6d851f70822bcb86863e485e1785a64c" - ], - "total_tokens": 2213, - "response_time": 10.17 + "model": "gpt-4o-2024-08-06", + "nodedetails": { + "chunkdetails": [ + { + "id": "80b30de81d6c1dca5973587b538887de8f47237d", + "score": 0.8614 + }, + { + "id": "419cee50cef9aba9c2204f2a7a17bfe84fb1df20", + "score": 0.8306 + }, + { + "id": "9daf80b62be569ace896d1945c7b25a7fcce9027", + "score": 0.827 + }, + { + "id": "36a1441279bf19feb5498d9a0ecfcebefac1d1f5", + "score": 0.8232 + }, + { + "id": "31a8234c4ab3c5fff8febb4264a5a041e1f21c36", + "score": 0.8181 + } + ], + "entitydetails": [], + "communitydetails": [] + }, + "total_tokens": 1713, + "response_time": 7.22, + "mode": "vector", + "entities": { + "entityids": [], + "relationshipids": [] + }, + "metric_details": { + "question": "what does google deepmind do?", + "contexts": "Document start\nThis Document belongs to the source https://en.wikipedia.org/wiki/Google_DeepMind\nContent: developed by Google DeepMind. == References == == Further reading == == External links == Official website\n----\nDeepMind Technologies Limited, also known by its trade name Google DeepMind, is a British-American artificial intelligence research laboratory which serves as a subsidiary of Google. Founded in the UK in 2010, it was acquired by Google in 2014 and merged with Google AIs Google Brain division to become Google DeepMind in April 2023.\nDocument end\n", + "answer": "Google DeepMind is a British-American artificial intelligence research laboratory that focuses on developing advanced AI technologies. It is known for creating neural network models capable of playing video games and board games, such as the AlphaGo program, which famously defeated a world champion Go player. DeepMind also works on general-purpose AI systems, like AlphaZero, which can play multiple games at a high level. Additionally, the company has been involved in AI safety work and has established an ethics board to guide its AI development." + } }, "user": "chatbot" } @@ -699,7 +726,7 @@ The API is used to delete unconnected entities from database. .... -== Decisions +==== Decisions * Process only 1st page of Wikipedia * Split document content into chunks of size 200 and overlap of 20 @@ -709,3 +736,168 @@ The API is used to delete unconnected entities from database. ** Embedding model ** minimum score for KNN graph ** Uploaded file storage location (GCS bucket or container) + + +=== Get duplicate nodes +---- +POST /get_duplicate_nodes +---- + +The API is used to fetch duplicate entities from database. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": [ + { + "e": { + "id": "13 September 2024", + "elementId": "4:b104b2e7-e2ed-4902-b78b-7ad1518ca04f:14007", + "communities": [ + 2969, + 383, + 81 + ], + "labels": [ + "__Entity__", + "Date" + ], + "embedding": null + }, + "similar": [ + { + "id": "20 September 2024", + "elementId": "4:b104b2e7-e2ed-4902-b78b-7ad1518ca04f:14153", + "description": null, + "labels": [ + "__Entity__", + "Date" + ] + } + ], + "documents": [], + "chunkConnections": 0 + } + ], + "message": { + "total": 1 + } +} +.... + + +=== Merge duplicate nodes +---- +POST /merge_duplicate_nodes +---- + +The API is used to merge duplicate entities from database selected by user. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `duplicate_nodes_list`= selected entities list to merge of with similar entities. + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": [ + { + "totalMerged": 2 + } + ], + "message": "Duplicate entities merged successfully" +} +.... +=== drop +---- +POST /retry_processing +---- + +The API is used to drop and create the vector index when vector index dimesion are different. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `isVectorIndexExist`= True or False based on whether vector index exist in database, + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "message": "Drop and Re-Create vector index succesfully" +} +.... + +=== Reprocess document +---- +POST /retry_processing +---- + +The API is used to reprocess document. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `file_name`= The name of the file which needs to be reprocessed, +* `retry_condition`= reprocessing condition whether to just start from beginning or delete created entitities and reprocess, + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "message": "Status set to Reprocess for filename : Alphabet.pdf" +} +.... + +=== Evaluate response +---- +POST /metric +---- + +The API responsible for a evaluating chatbot responses on the basis of different metrics such as faithfulness and answer relevancy. This utilises RAGAS library to calculate these metrics. + +**API Parameters :** + +* `question`= User query for the chatbot +* `context`= context retrieved by retrieval mode used for answer generation +* `answer`= answer generated by chatbot +* `model`= LLM model +* `mode`= Retrieval mode used for answer generationRetrieval mode used for answer generation + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "graph+vector+fulltext": { + "faithfulness": 1.0, + "answer_relevancy": 0.9699 + } + } +} +.... \ No newline at end of file From 0d2882c912a32748647e823b0a5ce313da336bde Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 24 Oct 2024 17:02:50 +0530 Subject: [PATCH 209/292] Update README.md --- README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/README.md b/README.md index e820f04a9..8d05f10fa 100644 --- a/README.md +++ b/README.md @@ -156,6 +156,32 @@ Allow unauthenticated request : Yes | RAGAS_EMBEDDING_MODEL | Optional | openai | embedding model used by ragas evaluation framework | +## For local llms (Ollama) +1. Pull the docker imgage of ollama +```bash +docker pull ollama/ollama +``` +2. Run the ollama docker image +```bash +docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama +``` +3. Execute any llm model ex🦙3 +```bash +docker exec -it ollama ollama run llama3 +``` +4. Configure env variable in docker compose or backend enviournment. +```env +LLM_MODEL_CONFIG_ollama_ +#example +LLM_MODEL_CONFIG_ollama_llama3=${LLM_MODEL_CONFIG_ollama_llama3-llama3, +http://host.docker.internal:11434} +``` +5. Configure the backend API url +```env +VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-backendurl} +``` +6. Open the application in browser and select the ollama model for the extraction. +7. Enjoy Graph Building. ## Usage From 6a6dc051c25e819a70f3586dc0d18b79cf4bb242 Mon Sep 17 00:00:00 2001 From: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Date: Thu, 24 Oct 2024 17:07:22 +0530 Subject: [PATCH 210/292] updated api structire in docs (#827) --- docs/backend/backend_docs.adoc | 220 ++++++++++++++++++++++----------- 1 file changed, 149 insertions(+), 71 deletions(-) diff --git a/docs/backend/backend_docs.adoc b/docs/backend/backend_docs.adoc index 3924e2a17..8016fd481 100644 --- a/docs/backend/backend_docs.adoc +++ b/docs/backend/backend_docs.adoc @@ -22,9 +22,15 @@ Neo4j database connection on frontend is done with this API. [source,json,indent=0] ---- { - "status":"Success", - "message":"Connection Successful" -} + "status": "Success", + "data": { + "db_vector_dimension": 384, + "application_dimension": 384, + "message": "Connection Successful", + "gds_status": true, + "write_access": true, + "elapsed_api_time": "5.48" + } ---- @@ -361,10 +367,12 @@ The API responsible for a chatbot system designed to leverage multiple AI models **Components :** -** Embedding Models - Includes OpenAI Embeddings, VertexAI Embeddings, and SentenceTransformer Embeddings to support vector-based query operations. -** AI Models - OpenAI GPT 3.5, GPT 4o, Gemini Pro, Gemini 1.5 Pro and Groq llama3 can be configured for the chatbot backend to generate responses and process natural language. +** Embedding Models - Includes OpenAI Embeddings, VertexAI Embeddings, and SentenceTransformer Embeddings(Default) to support vector-based query operations. +** AI Models - OpenAI GPT 3.5, GPT 4o, GPT 40 mini, gemini_1.5_flash can be configured for the chatbot backend to generate responses and process natural language. ** Graph Database (Neo4jGraph) - Manages interactions with the Neo4j database, retrieving, and storing conversation histories. ** Response Generation - Utilizes Vector Embeddings from the Neo4j database, chat history, and the knowledge base of the LLM used. +** Chat Modes - Vector , Graph, Vector + Graph, Fulltext, Vector + Graph+Fulltext, Entity Search + Vector, Global search Vector + **API Parameters :** @@ -373,9 +381,9 @@ The API responsible for a chatbot system designed to leverage multiple AI models * `password`= Neo4j database password * `model`= LLM model * `question`= User query for the chatbot -* `session_id`= Session ID used to maintain the history of chats during the user's connection -* `document_names`= File name/names for which user wants to question -* `mode`= Retrieval mode used for answer generation +* `session_id`= Session ID used to maintain the history of chats during the user's connection +* `mode` = chat mode to use +* `document_names` = the names of documents to be filtered works for vector mode and vector+Graph mode **Response :** [source,json,indent=0] @@ -383,50 +391,45 @@ The API responsible for a chatbot system designed to leverage multiple AI models { "status": "Success", "data": { - "session_id": "d345d9b6-f8f1-40c0-b585-280c998b9752", - "message": "Google DeepMind is a British-American artificial intelligence research laboratory that focuses on developing advanced AI technologies. It is known for creating neural network models capable of playing video games and board games, such as the AlphaGo program, which famously defeated a world champion Go player. DeepMind also works on general-purpose AI systems, like AlphaZero, which can play multiple games at a high level. Additionally, the company has been involved in AI safety work and has established an ethics board to guide its AI development.", + + "session_id": "0cbd04a8-abc3-4776-b393-6a9a2cea36b3", + "message": "response generated by the chat", "info": { "sources": [ - "https://en.wikipedia.org/wiki/Google_DeepMind" + "About Amazon.pdf" ], "model": "gpt-4o-2024-08-06", "nodedetails": { "chunkdetails": [ { - "id": "80b30de81d6c1dca5973587b538887de8f47237d", - "score": 0.8614 + "id": "73bc9c9170bcd807d2fa87d87a0eeb3d82f95160", + "score": 1.0 }, { - "id": "419cee50cef9aba9c2204f2a7a17bfe84fb1df20", - "score": 0.8306 - }, - { - "id": "9daf80b62be569ace896d1945c7b25a7fcce9027", - "score": 0.827 - }, - { - "id": "36a1441279bf19feb5498d9a0ecfcebefac1d1f5", - "score": 0.8232 - }, - { - "id": "31a8234c4ab3c5fff8febb4264a5a041e1f21c36", - "score": 0.8181 + "id": "de5486776978353c9f8ac530bcff33eeecbdbbad", + "score": 0.9425 } ], "entitydetails": [], "communitydetails": [] }, - "total_tokens": 1713, - "response_time": 7.22, - "mode": "vector", + "total_tokens": 4575, + "response_time": 17.19, + "mode": "graph_vector_fulltext", "entities": { - "entityids": [], - "relationshipids": [] + "entityids": [ + "4:98e5e9bb-8095-440d-9462-03985fed2fa2:307", + "4:98e5e9bb-8095-440d-9462-03985fed2fa2:1877", + ], + "relationshipids": [ + "5:98e5e9bb-8095-440d-9462-03985fed2fa2:8072566611095062357", + "5:98e5e9bb-8095-440d-9462-03985fed2fa2:8072566508015847224" + ] }, "metric_details": { - "question": "what does google deepmind do?", - "contexts": "Document start\nThis Document belongs to the source https://en.wikipedia.org/wiki/Google_DeepMind\nContent: developed by Google DeepMind. == References == == Further reading == == External links == Official website\n----\nDeepMind Technologies Limited, also known by its trade name Google DeepMind, is a British-American artificial intelligence research laboratory which serves as a subsidiary of Google. Founded in the UK in 2010, it was acquired by Google in 2014 and merged with Google AIs Google Brain division to become Google DeepMind in April 2023.\nDocument end\n", - "answer": "Google DeepMind is a British-American artificial intelligence research laboratory that focuses on developing advanced AI technologies. It is known for creating neural network models capable of playing video games and board games, such as the AlphaGo program, which famously defeated a world champion Go player. DeepMind also works on general-purpose AI systems, like AlphaZero, which can play multiple games at a high level. Additionally, the company has been involved in AI safety work and has established an ethics board to guide its AI development." + "question": "tell me about amazon ", + "contexts": "context sent to LLM" + "answer": "response generated by the LLM" } }, "user": "chatbot" @@ -447,7 +450,8 @@ This API is used to get the entities and relations associated with a particular * `userName`= Neo4j db username, * `password`= Neo4j db password, * `database`= Neo4j database name -* `chunk_ids` = Chunk ids of document +* `nodedetails` = Node element id's to get information(chunks,entities,communities) +* `entities` = entities received from the retriver for graph based modes **Response :** @@ -458,42 +462,51 @@ This API is used to get the entities and relations associated with a particular "data": { "nodes": [ { - "element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73267", + "element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:307", "labels": [ - "Condition" + "Company" ], "properties": { - "id": "Fibrosis" + "id": "Amazon", + "description": "Initially an online bookstore, Amazon has transformed into a $48 billion retail giant, offering products in over forty categories, from books and electronics to groceries. Today, it operates as a logistics platform, a search engine, an Internet advertising platform, an e-commerce platform, and an IT platform." } - }, - + } ], "relationships": [ { - "element_id": "5:a69712a5-1102-40da-a96d-70c1143ea8e5:1153057844048764467", - "type": "AFFECTS", - "start_node_element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73267", - "end_node_element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73282" - }, - { - "element_id": "5:a69712a5-1102-40da-a96d-70c1143ea8e5:1155309643862449715", - "type": "AFFECTS", - "start_node_element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73267", - "end_node_element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73294" - }, + "element_id": "5:98e5e9bb-8095-440d-9462-03985fed2fa2:6917952339617775946", + "type": "OFFERS", + "start_node_element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:307", + "end_node_element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:330" + } ], "chunk_data": [ { - "id": "54d8c0dbefb67f1ed3f6939d59267e1ff557a94c", - "position": 1, - "text": "Fibrosis, also known as fibrotic scarring, is a pathological wound healing ...", - "content_offset": 0, - "fileName": "fibrosis", - "length": 1002, + "element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:14", + "id": "d1e92be81a0872d621242cee9fed69d14b0cd68d", + "position": 13, + "text": " 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a ....", + "content_offset": 9886, + "fileName": "About Amazon.pdf", + "page_number": 7, + "length": 1024, + "fileSource": "local file", "embedding": null } + ], + "community_data": [ + { + "element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:1026", + "summary": "Google, led by CEO Sundar Pichai, is actively involved in various business and product initiatives.", + "id": "0-311", + "level": 0, + "weight": 7, + "embedding": null, + "community_rank": 1 + } ] - } + }, + "message": "Total elapsed API time 3.75" } .... @@ -551,6 +564,63 @@ This API is used to view graph for a particular file. } .... +=== Get neighbour nodes +---- +POST /get_neighbours +---- + +This API is used to retrive the neighbor nodes of the given element id of the node. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `elementId` = Element id of the node to retrive its neighbours + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "nodes": [ + { + "summary": null, + "element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:3", + "id": "73bc9c9170bcd807d2fa87d87a0eeb3d82f95160", + "position": 2, + "text": null, + "content_offset": 186, + "labels": [ + "Chunk" + ], + "page_number": 2, + "fileName": "About Amazon.pdf", + "length": 904, + "properties": { + "id": "73bc9c9170bcd807d2fa87d87a0eeb3d82f95160" + }, + "embedding": null + } + ], + "relationships": [ + { + "element_id": "5:98e5e9bb-8095-440d-9462-03985fed2fa2:1175445000301838339", + "end_node_element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:18", + "start_node_element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:3", + "type": "HAS_ENTITY" + }, + ] + }, + "message": "Total elapsed API time 0.24" +} +.... + + + === Clear chat history ---- POST /clear_chat_bot @@ -848,28 +918,36 @@ The API is used to drop and create the vector index when vector index dimesion a } .... -=== Reprocess document +=== Reprocessing of sources ---- POST /retry_processing ---- - -The API is used to reprocess document. - + +This API is used to reprocess cancelled, completed or failed file sources. +Users have 3 options to reprocess files: + +* Start from begnning - In this condition file will be processed from the begnning i.e. 1st chunk again. +* Delete entities and start from begnning - If the file source is already processed and have any existing nodes and relations then those will be deleted and file will be reprocessed from the 1st chunk. +* Start from last processed postion - Cancelled or failed files will be processed from the last successfully processed chunk position. This option is not available for completed files. + +Ones the status is set to 'Reprocess', user can again click on Generate graph to process the file for knowledge graph creation. + **API Parameters :** - -* `uri`=Neo4j uri, -* `userName`= Neo4j db username, -* `password`= Neo4j db password, + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, * `database`= Neo4j database name, -* `file_name`= The name of the file which needs to be reprocessed, -* `retry_condition`= reprocessing condition whether to just start from beginning or delete created entitities and reprocess, - +* `file_name`= Name of the file which user want to reprocess. +* `retry_condition` = One of the above 3 conditions which is selected for reprocessing. + + **Response :** [source,json,indent=0] .... { "status": "Success", - "message": "Status set to Reprocess for filename : Alphabet.pdf" + "message": "Status set to Reprocess for filename : $filename" } .... From 29ef09b7ec0525cc1cb43aedecc68da21488dbd1 Mon Sep 17 00:00:00 2001 From: karanchellani <142801957+karanchellani@users.noreply.github.com> Date: Thu, 24 Oct 2024 17:14:42 +0530 Subject: [PATCH 211/292] Update backend_docs.adoc --- docs/backend/backend_docs.adoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/backend/backend_docs.adoc b/docs/backend/backend_docs.adoc index 8016fd481..2591ac47e 100644 --- a/docs/backend/backend_docs.adoc +++ b/docs/backend/backend_docs.adoc @@ -894,9 +894,9 @@ The API is used to merge duplicate entities from database selected by user. "message": "Duplicate entities merged successfully" } .... -=== drop +=== Drop and create vector index ---- -POST /retry_processing +POST /drop_create_vector_index ---- The API is used to drop and create the vector index when vector index dimesion are different. @@ -978,4 +978,4 @@ The API responsible for a evaluating chatbot responses on the basis of different } } } -.... \ No newline at end of file +.... From c5cd025bf92adac81d4e7120ca9f93f8ea00c363 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Thu, 24 Oct 2024 17:18:19 +0530 Subject: [PATCH 212/292] 821 llm model listing (#823) * added logic for document filters * LLM models * message change * link added * removed the text --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> --- backend/src/QA_integration.py | 22 ++++++++++++++++++-- docker-compose.yml | 1 + example.env | 1 + frontend/Dockerfile | 2 ++ frontend/example.env | 1 + frontend/src/components/Content.tsx | 30 +++++++++++++--------------- frontend/src/components/Dropdown.tsx | 13 +++++++++++- frontend/src/utils/Constants.ts | 5 +++++ 8 files changed, 56 insertions(+), 19 deletions(-) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index d56ee5ccb..b7fcbd665 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -390,7 +390,7 @@ def get_neo4j_retriever(graph, document_names,chat_mode_settings, score_threshol try: neo_db = initialize_neo4j_vector(graph, chat_mode_settings) - document_names= list(map(str.strip, json.loads(document_names))) + # document_names= list(map(str.strip, json.loads(document_names))) search_k = chat_mode_settings["top_k"] retriever = create_retriever(neo_db, document_names,chat_mode_settings, search_k, score_threshold) return retriever @@ -656,7 +656,25 @@ def QA_RAG(graph,model, question, document_names, session_id, mode, write_access result = process_graph_response(model, graph, question, messages, history) else: chat_mode_settings = get_chat_mode_settings(mode=mode) - result = process_chat_response(messages,history, question, model, graph, document_names,chat_mode_settings) + document_names= list(map(str.strip, json.loads(document_names))) + if document_names and not chat_mode_settings["document_filter"]: + result = { + "session_id": "", + "message": "This chat mode does support document selection", + "info": { + "sources": [], + "model": "", + "nodedetails": [], + "total_tokens": 0, + "response_time": 0, + "mode": chat_mode_settings["mode"], + "entities": [], + "metric_details": [], + }, + "user": "chatbot" + } + else: + result = process_chat_response(messages,history, question, model, graph, document_names,chat_mode_settings) result["session_id"] = session_id diff --git a/docker-compose.yml b/docker-compose.yml index afc87b848..ea6d2c050 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -62,6 +62,7 @@ services: - VITE_ENV=${VITE_ENV-DEV} - VITE_CHAT_MODES=${VITE_CHAT_MODES-} - VITE_BATCH_SIZE=${VITE_BATCH_SIZE-2} + - VITE_LLM_MODELS_PROD=${VITE_LLM_MODELS_PROD-openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash} volumes: - ./frontend:/app - /app/node_modules diff --git a/example.env b/example.env index 227c51904..6b542daf1 100644 --- a/example.env +++ b/example.env @@ -31,3 +31,4 @@ VITE_CHUNK_SIZE=5242880 VITE_GOOGLE_CLIENT_ID="" VITE_CHAT_MODES="" VITE_BATCH_SIZE=2 +VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" diff --git a/frontend/Dockerfile b/frontend/Dockerfile index c3a7c1c82..3053e1ba9 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -12,6 +12,7 @@ ARG VITE_CHUNK_SIZE=5242880 ARG VITE_CHAT_MODES="" ARG VITE_ENV="DEV" ARG VITE_BATCH_SIZE=2 +ARG VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" WORKDIR /app COPY package.json yarn.lock ./ @@ -28,6 +29,7 @@ RUN VITE_BACKEND_API_URL=$VITE_BACKEND_API_URL \ VITE_LARGE_FILE_SIZE=${VITE_LARGE_FILE_SIZE} \ VITE_CHAT_MODES=$VITE_CHAT_MODES \ VITE_BATCH_SIZE=$VITE_BATCH_SIZE \ + VITE_LLM_MODELS_PROD=$VITE_LLM_MODELS_PROD \ yarn run build # Step 2: Serve the application using Nginx diff --git a/frontend/example.env b/frontend/example.env index 9bc5bc0d5..4063fbc37 100644 --- a/frontend/example.env +++ b/frontend/example.env @@ -9,3 +9,4 @@ VITE_LARGE_FILE_SIZE=5242880 VITE_GOOGLE_CLIENT_ID="" VITE_CHAT_MODES="" VITE_BATCH_SIZE=2 +VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index c5dceaf36..3f6be20fb 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -521,9 +521,8 @@ const Content: React.FC = ({ const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; const uriCoded = userCredentials?.uri.replace(/:\d+$/, ''); - const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${ - userCredentials?.port ?? '7687' - }`; + const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${userCredentials?.port ?? '7687' + }`; const encodedURL = encodeURIComponent(connectURL); const replacedUrl = bloomUrl?.replace('{CONNECT_URL}', encodedURL); window.open(replacedUrl, '_blank'); @@ -533,10 +532,10 @@ const Content: React.FC = ({ isLeftExpanded && isRightExpanded ? 'contentWithExpansion' : isRightExpanded - ? 'contentWithChatBot' - : !isLeftExpanded && !isRightExpanded - ? 'w-[calc(100%-128px)]' - : 'contentWithDropzoneExpansion'; + ? 'contentWithChatBot' + : !isLeftExpanded && !isRightExpanded + ? 'w-[calc(100%-128px)]' + : 'contentWithDropzoneExpansion'; const handleGraphView = () => { setOpenGraphView(true); @@ -568,12 +567,12 @@ const Content: React.FC = ({ return prev.map((f) => { return f.name === filename ? { - ...f, - status: 'Reprocess', - processingProgress: isStartFromBegining ? 0 : f.processingProgress, - nodesCount: isStartFromBegining ? 0 : f.nodesCount, - relationshipCount: isStartFromBegining ? 0 : f.relationshipsCount, - } + ...f, + status: 'Reprocess', + processingProgress: isStartFromBegining ? 0 : f.processingProgress, + nodesCount: isStartFromBegining ? 0 : f.nodesCount, + relationshipCount: isStartFromBegining ? 0 : f.relationshipsCount, + } : f; }); }); @@ -862,9 +861,8 @@ const Content: React.FC = ({ handleGenerateGraph={processWaitingFilesOnRefresh} > diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index 087e0d71c..8c434309f 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -2,7 +2,7 @@ import { Dropdown, Tip } from '@neo4j-ndl/react'; import { OptionType, ReusableDropdownProps } from '../types'; import { memo, useMemo, useReducer } from 'react'; import { capitalize, capitalizeWithUnderscore } from '../utils/Utils'; - +// import { LLMDropdownLabel } from '../utils/Constants'; const DropdownComponent: React.FC = ({ options, placeholder, @@ -14,6 +14,8 @@ const DropdownComponent: React.FC = ({ value, }) => { const [disableTooltip, toggleDisableState] = useReducer((state) => !state, false); + const isProdEnv = process.env.VITE_ENV === 'PROD'; + const supportedModels = process.env.VITE_LLM_MODELS_PROD; const handleChange = (selectedOption: OptionType | null | void) => { onSelect(selectedOption); }; @@ -33,9 +35,11 @@ const DropdownComponent: React.FC = ({ const label = typeof option === 'string' ? capitalizeWithUnderscore(option) : capitalize(option.label); const value = typeof option === 'string' ? option : option.value; + const isModelSupported = !isProdEnv || supportedModels?.includes(value); return { label, value, + isDisabled: !isModelSupported, }; }), placeholder: placeholder || 'Select an option', @@ -60,6 +64,13 @@ const DropdownComponent: React.FC = ({ {children}
        + {/* {isProdEnv && ( + {LLMDropdownLabel.disabledModels} + + {LLMDropdownLabel.devEnv} + + {'.'} + )} */} ); }; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 0043dc241..112f8bbec 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -306,3 +306,8 @@ export const appLabels = { ownSchema: 'Or Define your own Schema', predefinedSchema: 'Select a Pre-defined Schema', }; + +export const LLMDropdownLabel ={ + disabledModels: 'Disabled models are available in the development version. Access more models in our ', + devEnv: 'development environment' +} From 4bed3521cb50478ea4637ed33522d49bc8db5196 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Fri, 25 Oct 2024 08:44:58 +0000 Subject: [PATCH 213/292] Exclude session lable node from duplicate nodes list --- backend/src/graphDB_dataAccess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index 189d581a6..58834eb92 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -354,7 +354,7 @@ def get_duplicate_nodes_list(self): score_value = float(os.environ.get('DUPLICATE_SCORE_VALUE')) text_distance = int(os.environ.get('DUPLICATE_TEXT_DISTANCE')) query_duplicate_nodes = """ - MATCH (n:!Chunk&!Document&!`__Community__`) with n + MATCH (n:!Chunk&!Session&!Document&!`__Community__`) with n WHERE n.embedding is not null and n.id is not null // and size(toString(n.id)) > 3 WITH n ORDER BY count {{ (n)--() }} DESC, size(toString(n.id)) DESC // updated WITH collect(n) as nodes From 3dfb42be3f9fdbb1d47b79733aad02c6c6aee751 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 25 Oct 2024 15:08:56 +0530 Subject: [PATCH 214/292] Added the tooltip for disabled llm option (#835) --- .../src/components/ChatBot/ChatInfoModal.tsx | 7 +- .../src/components/ChatBot/ChatModeToggle.tsx | 4 +- .../components/ChatBot/ChatModesSwitch.tsx | 4 +- frontend/src/components/ChatBot/ChunkInfo.tsx | 35 ++++--- frontend/src/components/Content.tsx | 30 +++--- .../components/DataSources/AWS/S3Modal.tsx | 4 +- .../components/DataSources/GCS/GCSModal.tsx | 4 +- frontend/src/components/Dropdown.tsx | 94 +++++++++---------- frontend/src/components/FileTable.tsx | 2 +- .../Deduplication/index.tsx | 4 +- .../EntityExtractionSetting.tsx | 10 +- .../SelectedJobList.tsx | 10 +- frontend/src/hooks/useSourceInput.tsx | 4 +- frontend/src/utils/Constants.ts | 13 ++- 14 files changed, 120 insertions(+), 105 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 4ffdd9acd..b3c47b4be 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -261,7 +261,12 @@ const ChatInfoModal: React.FC = ({ To generate this response, the process took {response_time} seconds, utilizing {total_tokens} tokens with the model{' '} {model} in{' '} - {chatModeReadableLables[mode] !== 'vector' ? chatModeReadableLables[mode].replace(/\+/g, ' & ') : chatModeReadableLables[mode]} mode. + + {chatModeReadableLables[mode] !== 'vector' + ? chatModeReadableLables[mode].replace(/\+/g, ' & ') + : chatModeReadableLables[mode]} + {' '} + mode. diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 52df01a4a..e26da232c 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -38,8 +38,8 @@ export default function ChatModeToggle({ ) ); } - if (!(chatModes.includes(chatModeLables.vector) || chatModes.includes(chatModeLables['graph+vector']))) { - setchatModes([chatModeLables['graph+vector']]); + if (!chatModes.includes(chatModeLables.vector)) { + setchatModes([chatModeLables.vector]); } } }, [selectedRows.length, chatModes.length]); diff --git a/frontend/src/components/ChatBot/ChatModesSwitch.tsx b/frontend/src/components/ChatBot/ChatModesSwitch.tsx index db91e3372..4ace49af5 100644 --- a/frontend/src/components/ChatBot/ChatModesSwitch.tsx +++ b/frontend/src/components/ChatBot/ChatModesSwitch.tsx @@ -17,7 +17,9 @@ export default function ChatModesSwitch({ currentMode: string; isFullScreen: boolean; }) { - const chatmodetoshow =chatModeReadableLables[currentMode].includes('+') ? capitalizeWithPlus(chatModeReadableLables[currentMode]) : capitalize(chatModeReadableLables[currentMode]); + const chatmodetoshow = chatModeReadableLables[currentMode].includes('+') + ? capitalizeWithPlus(chatModeReadableLables[currentMode]) + : capitalize(chatModeReadableLables[currentMode]); return ( = ({ loading, chunks, mode }) => { label='Graph view' className={`${loadingGraphView ? 'cursor-wait' : 'cursor-pointer'}`} onClick={() => handleChunkClick(chunk.element_id, 'Chunk')} - >{'View Graph'} + > + {'View Graph'}
    @@ -98,11 +99,12 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { Similarity Score: {chunk?.score}
    handleChunkClick(chunk.element_id, 'Chunk')} - >{'View Graph'} + > + {'View Graph'}
    @@ -121,10 +123,11 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { Similarity Score: {chunk?.score}
    handleChunkClick(chunk.element_id, 'Chunk')} - >{'View Graph'} + > + {'View Graph'}
    @@ -143,10 +146,11 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { Similarity Score: {chunk?.score}
    handleChunkClick(chunk.element_id, 'Chunk')} - >{'View Graph'} + > + {'View Graph'}
    @@ -165,10 +169,11 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { Similarity Score: {chunk?.score}
    handleChunkClick(chunk.element_id, 'Chunk')} - >{'View Graph'} + > + {'View Graph'}
    @@ -191,10 +196,11 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { Similarity Score: {chunk?.score}
    handleChunkClick(chunk.element_id, 'Chunk')} - >{'View Graph'} + > + {'View Graph'}
    @@ -222,10 +228,11 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
    handleChunkClick(chunk.element_id, 'Chunk')} - >{'View Graph'} + > + {'View Graph'}
    diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 83c2c20b4..28caddc1c 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -522,8 +522,9 @@ const Content: React.FC = ({ const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; const uriCoded = userCredentials?.uri.replace(/:\d+$/, ''); - const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${userCredentials?.port ?? '7687' - }`; + const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${ + userCredentials?.port ?? '7687' + }`; const encodedURL = encodeURIComponent(connectURL); const replacedUrl = bloomUrl?.replace('{CONNECT_URL}', encodedURL); window.open(replacedUrl, '_blank'); @@ -533,10 +534,10 @@ const Content: React.FC = ({ isLeftExpanded && isRightExpanded ? 'contentWithExpansion' : isRightExpanded - ? 'contentWithChatBot' - : !isLeftExpanded && !isRightExpanded - ? 'w-[calc(100%-128px)]' - : 'contentWithDropzoneExpansion'; + ? 'contentWithChatBot' + : !isLeftExpanded && !isRightExpanded + ? 'w-[calc(100%-128px)]' + : 'contentWithDropzoneExpansion'; const handleGraphView = () => { setOpenGraphView(true); @@ -568,12 +569,12 @@ const Content: React.FC = ({ return prev.map((f) => { return f.name === filename ? { - ...f, - status: 'Reprocess', - processingProgress: isStartFromBegining ? 0 : f.processingProgress, - nodesCount: isStartFromBegining ? 0 : f.nodesCount, - relationshipCount: isStartFromBegining ? 0 : f.relationshipsCount, - } + ...f, + status: 'Reprocess', + processingProgress: isStartFromBegining ? 0 : f.processingProgress, + nodesCount: isStartFromBegining ? 0 : f.nodesCount, + relationshipCount: isStartFromBegining ? 0 : f.relationshipsCount, + } : f; }); }); @@ -862,8 +863,9 @@ const Content: React.FC = ({ handleGenerateGraph={processWaitingFilesOnRefresh} > diff --git a/frontend/src/components/DataSources/AWS/S3Modal.tsx b/frontend/src/components/DataSources/AWS/S3Modal.tsx index 8e809dfff..c23b94a3b 100644 --- a/frontend/src/components/DataSources/AWS/S3Modal.tsx +++ b/frontend/src/components/DataSources/AWS/S3Modal.tsx @@ -82,7 +82,7 @@ const S3Modal: React.FC = ({ hideModal, open }) => { name: item.fileName, size: item.fileSize, sourceUrl: item.url, - uploadProgress:100, + uploadProgress: 100, // total_pages: 'N/A', id: uuidv4(), ...defaultValues, @@ -99,7 +99,7 @@ const S3Modal: React.FC = ({ hideModal, open }) => { model: defaultValues.model, fileSource: defaultValues.fileSource, processingProgress: defaultValues.processingProgress, - uploadProgress:100, + uploadProgress: 100, }); } }); diff --git a/frontend/src/components/DataSources/GCS/GCSModal.tsx b/frontend/src/components/DataSources/GCS/GCSModal.tsx index 01b405cee..a68faeb97 100644 --- a/frontend/src/components/DataSources/GCS/GCSModal.tsx +++ b/frontend/src/components/DataSources/GCS/GCSModal.tsx @@ -105,7 +105,7 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => id: uuidv4(), accessToken: codeResponse.access_token, ...defaultValues, - uploadProgress:100 + uploadProgress: 100, }); } else { const tempFileData = copiedFilesData[filedataIndex]; @@ -120,7 +120,7 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => fileSource: defaultValues.fileSource, processingProgress: defaultValues.processingProgress, accessToken: codeResponse.access_token, - uploadProgress:100 + uploadProgress: 100, }); } } diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index 8c434309f..663861cfd 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -1,8 +1,9 @@ -import { Dropdown, Tip } from '@neo4j-ndl/react'; +import { Dropdown, Tip, useMediaQuery } from '@neo4j-ndl/react'; import { OptionType, ReusableDropdownProps } from '../types'; -import { memo, useMemo, useReducer } from 'react'; +import { memo, useMemo } from 'react'; import { capitalize, capitalizeWithUnderscore } from '../utils/Utils'; -// import { LLMDropdownLabel } from '../utils/Constants'; +import { prodllms } from '../utils/Constants'; + const DropdownComponent: React.FC = ({ options, placeholder, @@ -13,9 +14,8 @@ const DropdownComponent: React.FC = ({ isDisabled, value, }) => { - const [disableTooltip, toggleDisableState] = useReducer((state) => !state, false); const isProdEnv = process.env.VITE_ENV === 'PROD'; - const supportedModels = process.env.VITE_LLM_MODELS_PROD; + const isLargeDesktop = useMediaQuery(`(min-width:1440px )`); const handleChange = (selectedOption: OptionType | null | void) => { onSelect(selectedOption); }; @@ -23,54 +23,46 @@ const DropdownComponent: React.FC = ({ return ( <>
    - - - { - const label = - typeof option === 'string' ? capitalizeWithUnderscore(option) : capitalize(option.label); - const value = typeof option === 'string' ? option : option.value; - const isModelSupported = !isProdEnv || supportedModels?.includes(value); - return { - label, - value, - isDisabled: !isModelSupported, - }; - }), - placeholder: placeholder || 'Select an option', - defaultValue: defaultValue - ? { label: capitalizeWithUnderscore(defaultValue), value: defaultValue } - : undefined, - menuPlacement: 'auto', - isDisabled: isDisabled, - value: value, - onMenuOpen: () => { - toggleDisableState(); - }, - onMenuClose: () => { - toggleDisableState(); - }, - }} - size='medium' - fluid - /> - - LLM Model used for Extraction & Chat - + LLM Model used for Extraction & Chat
    } + selectProps={{ + onChange: handleChange, + // @ts-ignore + options: allOptions?.map((option) => { + const label = typeof option === 'string' ? capitalizeWithUnderscore(option) : capitalize(option.label); + const value = typeof option === 'string' ? option : option.value; + const isModelSupported = !isProdEnv || prodllms?.includes(value); + return { + label: !isModelSupported ? ( + + + {label} + + Available In Development Version + + ) : ( + {label} + ), + value, + isDisabled: !isModelSupported, + }; + }), + placeholder: placeholder || 'Select an option', + defaultValue: defaultValue + ? { label: capitalizeWithUnderscore(defaultValue), value: defaultValue } + : undefined, + menuPlacement: 'auto', + isDisabled: isDisabled, + value: value, + }} + size='medium' + fluid + /> {children}
    - {/* {isProdEnv && ( - {LLMDropdownLabel.disabledModels} - - {LLMDropdownLabel.devEnv} - - {'.'} - )} */} ); }; diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index b3fbafd55..ec367b100 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -522,7 +522,7 @@ const FileTable = forwardRef((props, ref) => { disabled={info.getValue() === 'Uploading'} clean onClick={() => { - const copied={...info.row.original}; + const copied = { ...info.row.original }; delete copied.accessToken; handleCopy(copied); }} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 1e6b0c213..7140bb6b2 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -56,8 +56,8 @@ export default function DeduplicationTab() { } if (duplicateNodesData.data.data.length) { setDuplicateNodes(duplicateNodesData.data.data); - //@ts-ignore - setNodesCount(duplicateNodesData.data.message.total) + // @ts-ignore + setNodesCount(duplicateNodesData.data.message.total); } else { setDuplicateNodes([]); } diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx index 67dd10f9e..3ab5b48bd 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx @@ -317,8 +317,9 @@ export default function EntityExtractionSetting({ options: nodeLabelOptions, onChange: onChangenodes, value: selectedNodes, - classNamePrefix: `${isTablet ? 'tablet_entity_extraction_Tab_node_label' : 'entity_extraction_Tab_node_label' - }`, + classNamePrefix: `${ + isTablet ? 'tablet_entity_extraction_Tab_node_label' : 'entity_extraction_Tab_node_label' + }`, }} type='creatable' /> @@ -332,8 +333,9 @@ export default function EntityExtractionSetting({ options: relationshipTypeOptions, onChange: onChangerels, value: selectedRels, - classNamePrefix: `${isTablet ? 'tablet_entity_extraction_Tab_relationship_label' : 'entity_extraction_Tab_relationship_label' - }`, + classNamePrefix: `${ + isTablet ? 'tablet_entity_extraction_Tab_relationship_label' : 'entity_extraction_Tab_relationship_label' + }`, }} type='creatable' /> diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx index 348c97af2..be608f717 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx @@ -11,11 +11,11 @@ export default function SelectedJobList({ }) { const ongoingPostProcessingTasks = useMemo( () => - (isGdsActive - ? postProcessingTasks.includes('enable_communities') - ? postProcessingTasks - : postProcessingTasks.filter((s) => s != 'enable_communities') - : postProcessingTasks.filter((s) => s != 'enable_communities')), + (isGdsActive + ? postProcessingTasks.includes('enable_communities') + ? postProcessingTasks + : postProcessingTasks.filter((s) => s != 'enable_communities') + : postProcessingTasks.filter((s) => s != 'enable_communities')), [isGdsActive, postProcessingTasks] ); return ( diff --git a/frontend/src/hooks/useSourceInput.tsx b/frontend/src/hooks/useSourceInput.tsx index ee75a8cb4..7965c03b9 100644 --- a/frontend/src/hooks/useSourceInput.tsx +++ b/frontend/src/hooks/useSourceInput.tsx @@ -118,7 +118,7 @@ export default function useSourceInput( sourceUrl: item.url, id: uuidv4(), language: item.language, - uploadProgress:100, + uploadProgress: 100, // total_pages: 1, ...defaultValues, }; @@ -138,7 +138,7 @@ export default function useSourceInput( model: defaultValues.model, fileSource: defaultValues.fileSource, processingProgress: defaultValues.processingProgress, - uploadProgress:100, + uploadProgress: 100, }); } } diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 112f8bbec..8d9289fd8 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -46,6 +46,11 @@ export const supportedLLmsForRagas = [ 'fireworks_llama_v3_70b', 'bedrock_claude_3_5_sonnet', ]; +export const prodllms = + process.env.VITE_LLM_MODELS_PROD?.trim() != '' + ? (process.env.VITE_LLM_MODELS_PROD?.split(',') as string[]) + : ['openai_gpt_4o', 'openai_gpt_4o_mini', 'diffbot', 'gemini_1.5_flash']; + export const chatModeLables = { vector: 'vector', graph: 'graph', @@ -140,7 +145,7 @@ export const tooltips = { }; export const buttonCaptions = { - exploreGraphWithBloom: 'Explore Graph with Bloom', + exploreGraphWithBloom: 'Explore Graph', showPreviewGraph: 'Preview Graph', deleteFiles: 'Delete Files', generateGraph: 'Generate Graph', @@ -307,7 +312,7 @@ export const appLabels = { predefinedSchema: 'Select a Pre-defined Schema', }; -export const LLMDropdownLabel ={ +export const LLMDropdownLabel = { disabledModels: 'Disabled models are available in the development version. Access more models in our ', - devEnv: 'development environment' -} + devEnv: 'development environment', +}; From 4d795bf920ff93c2881cca135c6965addfe75e17 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:42:45 +0000 Subject: [PATCH 215/292] node size changes --- frontend/src/components/Graph/GraphViewModal.tsx | 6 ------ frontend/src/components/Graph/ResultOverview.tsx | 6 ------ 2 files changed, 12 deletions(-) diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 3dc92a745..4f981aa33 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -228,12 +228,6 @@ const GraphViewModal: React.FunctionComponent = ({ return { ...node, selected: match, - size: - match && viewPoint === graphLabels.showGraphView - ? 100 - : match && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, }; }); // deactivating any active relationships diff --git a/frontend/src/components/Graph/ResultOverview.tsx b/frontend/src/components/Graph/ResultOverview.tsx index 93c6b54e3..85143336f 100644 --- a/frontend/src/components/Graph/ResultOverview.tsx +++ b/frontend/src/components/Graph/ResultOverview.tsx @@ -92,12 +92,6 @@ const ResultOverview: React.FunctionComponent = ({ return { ...node, selected: isActive, - size: - isActive && viewPoint === graphLabels.showGraphView - ? 100 - : isActive && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, }; }); // deactivating any active relationships From 1fac375c0d28d060ae73ea903598a3a11a0a7897 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:50:12 +0000 Subject: [PATCH 216/292] mode removal of rows check --- .../src/components/ChatBot/ChatModeToggle.tsx | 53 ++++--------------- 1 file changed, 11 insertions(+), 42 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index e26da232c..d66bda43f 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -9,7 +9,7 @@ import { useCredentials } from '../../context/UserCredentials'; export default function ChatModeToggle({ menuAnchor, - closeHandler = () => {}, + closeHandler = () => { }, open, anchorPortal = true, disableBackdrop = false, @@ -20,30 +20,14 @@ export default function ChatModeToggle({ anchorPortal?: boolean; disableBackdrop?: boolean; }) { - const { setchatModes, chatModes, postProcessingTasks, selectedRows } = useFileContext(); + const { setchatModes, chatModes, postProcessingTasks } = useFileContext(); const isCommunityAllowed = postProcessingTasks.includes('enable_communities'); const { isGdsActive } = useCredentials(); - useEffect(() => { - // If rows are selected, the mode is valid (either vector or graph+vector) - if (selectedRows.length > 0) { - if ( - chatModes.includes(chatModeLables.graph) || - chatModes.includes(chatModeLables.fulltext) || - chatModes.includes(chatModeLables['global search+vector+fulltext']) - ) { - setchatModes((prev) => - prev.filter( - (m) => ![chatModeLables.graph, chatModeLables.fulltext, chatModeLables['graph+vector+fulltext']].includes(m) - ) - ); - } - if (!chatModes.includes(chatModeLables.vector)) { - setchatModes([chatModeLables.vector]); - } + if (!chatModes.length) { + setchatModes([chatModeLables['graph+vector+fulltext']]); } - }, [selectedRows.length, chatModes.length]); - + }, [chatModes.length]); const memoizedChatModes = useMemo(() => { return isGdsActive && isCommunityAllowed ? AvailableModes @@ -51,14 +35,10 @@ export default function ChatModeToggle({ }, [isGdsActive, isCommunityAllowed]); const menuItems = useMemo(() => { return memoizedChatModes?.map((m) => { - const isDisabled = Boolean( - selectedRows.length && !(m.mode === chatModeLables.vector || m.mode === chatModeLables['graph+vector']) - ); const handleModeChange = () => { - if (isDisabled) { - setchatModes([chatModeLables['graph+vector']]); - } else if (chatModes.includes(m.mode)) { - setchatModes((prev) => prev.filter((i) => i != m.mode)); + if (chatModes.includes(m.mode)) { + if (chatModes.length === 1) return; + setchatModes((prev) => prev.filter((i) => i !== m.mode)); } else { setchatModes((prev) => [...prev, m.mode]); } @@ -78,7 +58,7 @@ export default function ChatModeToggle({
    ), onClick: handleModeChange, - disabledCondition: isDisabled, + disabledCondition: false, description: ( {chatModes.includes(m.mode) && ( @@ -86,22 +66,11 @@ export default function ChatModeToggle({ {chatModeLables.selected} )} - {isDisabled && ( - <> - {chatModeLables.unavailableChatMode} - - )} ), }; }); - }, [chatModes.length, memoizedChatModes, closeHandler, selectedRows]); - - useEffect(() => { - if (!selectedRows.length && !chatModes.length) { - setchatModes([]); - } - }, [selectedRows.length, chatModes.length]); + }, [chatModes, memoizedChatModes, closeHandler]); return ( ); -} +} \ No newline at end of file From eb14fbebb8ac9a9d281a9a720072490446a3e6ee Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:56:41 +0000 Subject: [PATCH 217/292] formatting --- frontend/src/components/ChatBot/ChatModeToggle.tsx | 8 +++++--- frontend/src/components/Graph/GraphViewModal.tsx | 1 - frontend/src/components/Graph/ResultOverview.tsx | 2 -- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index d66bda43f..0a9a81e10 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -9,7 +9,7 @@ import { useCredentials } from '../../context/UserCredentials'; export default function ChatModeToggle({ menuAnchor, - closeHandler = () => { }, + closeHandler = () => {}, open, anchorPortal = true, disableBackdrop = false, @@ -37,7 +37,9 @@ export default function ChatModeToggle({ return memoizedChatModes?.map((m) => { const handleModeChange = () => { if (chatModes.includes(m.mode)) { - if (chatModes.length === 1) return; + if (chatModes.length === 1) { + return; + } setchatModes((prev) => prev.filter((i) => i !== m.mode)); } else { setchatModes((prev) => [...prev, m.mode]); @@ -81,4 +83,4 @@ export default function ChatModeToggle({ items={menuItems} /> ); -} \ No newline at end of file +} diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 4f981aa33..e50f215d1 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -433,7 +433,6 @@ const GraphViewModal: React.FunctionComponent = ({ newScheme={newScheme} searchQuery={searchQuery} setSearchQuery={setSearchQuery} - viewPoint={viewPoint} setNodes={setNodes} setRelationships={setRelationships} /> diff --git a/frontend/src/components/Graph/ResultOverview.tsx b/frontend/src/components/Graph/ResultOverview.tsx index 85143336f..142c9fabc 100644 --- a/frontend/src/components/Graph/ResultOverview.tsx +++ b/frontend/src/components/Graph/ResultOverview.tsx @@ -14,7 +14,6 @@ interface OverViewProps { newScheme: Scheme; searchQuery: string; setSearchQuery: Dispatch>; - viewPoint: string; setNodes: Dispatch>; setRelationships: Dispatch>; } @@ -24,7 +23,6 @@ const ResultOverview: React.FunctionComponent = ({ newScheme, searchQuery, setSearchQuery, - viewPoint, setNodes, setRelationships, }) => { From 5cd9724b42874cd35adc1191ff21fb58ec46b285 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Fri, 25 Oct 2024 11:26:58 +0000 Subject: [PATCH 218/292] Exclude __Entity__ node label from duplicate node list --- backend/src/graphDB_dataAccess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index 58834eb92..82391d1e4 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -354,7 +354,7 @@ def get_duplicate_nodes_list(self): score_value = float(os.environ.get('DUPLICATE_SCORE_VALUE')) text_distance = int(os.environ.get('DUPLICATE_TEXT_DISTANCE')) query_duplicate_nodes = """ - MATCH (n:!Chunk&!Session&!Document&!`__Community__`) with n + MATCH (n:!Chunk&!Session&!Document&!`__Community__`&!`__Entity__`) with n WHERE n.embedding is not null and n.id is not null // and size(toString(n.id)) > 3 WITH n ORDER BY count {{ (n)--() }} DESC, size(toString(n.id)) DESC // updated WITH collect(n) as nodes From 70cb0042fa94e08ab6d88fd54cfbbd6b0fc71aef Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Mon, 28 Oct 2024 14:51:27 +0530 Subject: [PATCH 219/292] Update README.md --- README.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8d05f10fa..79008efdf 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,11 @@ If you are using Neo4j Desktop, you will not be able to use the docker-compose b ### Local deployment #### Running through docker-compose By default only OpenAI and Diffbot are enabled since Gemini requires extra GCP configurations. +Accoroding to enviornment we are configuring the models which is indicated by VITE_LLM_MODELS_PROD variable we can configure model based on our need. +EX: +```env +VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" +``` In your root folder, create a .env file with your OPENAI and DIFFBOT keys (if you want to use both): ```env @@ -72,7 +77,7 @@ You can of course combine all (local, youtube, wikipedia, s3 and gcs) or remove ### Chat Modes -By default,all of the chat modes will be available: vector, graph+vector and graph. +By default,all of the chat modes will be available: vector, graph_vector, graph, fulltext, graph_vector_fulltext , entity_vector and global_vector. If none of the mode is mentioned in the chat modes variable all modes will be available: ```env VITE_CHAT_MODES="" @@ -80,7 +85,7 @@ VITE_CHAT_MODES="" If however you want to specify the only vector mode or only graph mode you can do that by specifying the mode in the env: ```env -VITE_CHAT_MODES="vector,graph+vector" +VITE_CHAT_MODES="vector,graph" ``` #### Running Backend and Frontend separately (dev environment) @@ -150,12 +155,14 @@ Allow unauthenticated request : Yes | VITE_TIME_PER_PAGE | Optional | 50 | Time per page for processing | | VITE_CHUNK_SIZE | Optional | 5242880 | Size of each chunk of file for upload | | VITE_GOOGLE_CLIENT_ID | Optional | | Client ID for Google authentication | +| VITE_LLM_MODELS_PROD | Optional | openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash | To Distinguish models based on the Enviornment PROD or DEV | GCS_FILE_CACHE | Optional | False | If set to True, will save the files to process into GCS. If set to False, will save the files locally | | ENTITY_EMBEDDING | Optional | False | If set to True, It will add embeddings for each entity in database | | LLM_MODEL_CONFIG_ollama_ | Optional | | Set ollama config as - model_name,model_local_url for local deployments | | RAGAS_EMBEDDING_MODEL | Optional | openai | embedding model used by ragas evaluation framework | + ## For local llms (Ollama) 1. Pull the docker imgage of ollama ```bash From bf51e7883e251b67514ff75853bd82ad30a62a4a Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:09:02 +0530 Subject: [PATCH 220/292] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 79008efdf..64e8c818e 100644 --- a/README.md +++ b/README.md @@ -45,13 +45,13 @@ DIFFBOT_API_KEY="your-diffbot-key" if you only want OpenAI: ```env -VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" +VITE_LLM_MODELS_PROD="diffbot,openai-gpt-3.5,openai-gpt-4o" OPENAI_API_KEY="your-openai-key" ``` if you only want Diffbot: ```env -VITE_LLM_MODELS="diffbot" +VITE_LLM_MODELS_PROD="diffbot" DIFFBOT_API_KEY="your-diffbot-key" ``` From 76b325ca8d570da022c8e0b5f3106aead52586da Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:09:32 +0530 Subject: [PATCH 221/292] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 64e8c818e..fadf6eee9 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,6 @@ Allow unauthenticated request : Yes | VITE_BACKEND_API_URL | Optional | http://localhost:8000 | URL for backend API | | VITE_BLOOM_URL | Optional | https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true | URL for Bloom visualization | | VITE_REACT_APP_SOURCES | Mandatory | local,youtube,wiki,s3 | List of input sources that will be available | -| VITE_LLM_MODELS | Mandatory | diffbot,openai-gpt-3.5,openai-gpt-4o | Models available for selection on the frontend, used for entities extraction and Q&A | VITE_CHAT_MODES | Mandatory | vector,graph+vector,graph,hybrid | Chat modes available for Q&A | VITE_ENV | Mandatory | DEV or PROD | Environment variable for the app | | VITE_TIME_PER_PAGE | Optional | 50 | Time per page for processing | From 1d607bc6f841459af3ac0f4b6e6093e6cc513cbd Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 30 Oct 2024 11:59:24 +0000 Subject: [PATCH 222/292] fixed the youtube link --- frontend/src/components/ChatBot/SourcesInfo.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx index a934913d9..ddfe92a6e 100644 --- a/frontend/src/components/ChatBot/SourcesInfo.tsx +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -106,7 +106,7 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { )} {!link?.startsWith('s3://') && - !isAllowedHost(link, ['storage.googleapis.com', 'wikipedia.org', 'youtube.com']) && ( + !isAllowedHost(link, ['storage.googleapis.com', 'wikipedia.org', 'www.youtube.com']) && (
    From d8af5a501bfd0203979a40027a8a53b9094e0763 Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Fri, 8 Nov 2024 11:28:55 +0530 Subject: [PATCH 223/292] Security header and GZIPMiddleware (#847) * Added security header all API * Add GZipMiddleware --- backend/score.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/score.py b/backend/score.py index f7b8c4082..af8400dd5 100644 --- a/backend/score.py +++ b/backend/score.py @@ -33,7 +33,7 @@ from Secweb.ContentSecurityPolicy import ContentSecurityPolicy from Secweb.XContentTypeOptions import XContentTypeOptions from Secweb.XFrameOptions import XFrame - +from fastapi.middleware.gzip import GZipMiddleware from src.ragas_eval import * logger = CustomLogger() @@ -52,10 +52,10 @@ def sick(): app = FastAPI() # SecWeb(app=app, Option={'referrer': False, 'xframe': False}) -# app.add_middleware(HSTS, Option={'max-age': 4}) -# app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) -# app.add_middleware(XContentTypeOptions) -# app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) +app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) +app.add_middleware(XContentTypeOptions) +app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) +app.add_middleware(GZipMiddleware, minimum_size=1000, compresslevel=5) app.add_middleware( CORSMiddleware, From 358d5a6102303b81518965ad6d96053945846515 Mon Sep 17 00:00:00 2001 From: kaustubh-darekar Date: Fri, 8 Nov 2024 11:57:13 +0530 Subject: [PATCH 224/292] Chunk Text Details (#850) * Community title added * Added api for fetching chunk text details * output format changed for chunk text * integrated the service layer for chunkdata * added the chunks * formatting output of llm call for title generation * formatting llm output for title generation * added flex row * Changes related to pagination of fetch chunk api * Integrated the pagination * page changes error resolved for fetch chunk api * for get neighbours api , community title added in properties * moving community title related changes to separate branch * Removed Query module from fastapi import statement * icon changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> --- backend/score.py | 54 +++++++++++++- backend/src/communities.py | 1 - backend/src/graph_query.py | 33 ++++++++- backend/src/shared/constants.py | 13 ++++ frontend/src/components/Content.tsx | 62 ++++++++++++++-- frontend/src/components/FileTable.tsx | 22 +++++- .../components/Popups/ChunkPopUp/index.tsx | 72 +++++++++++++++++++ frontend/src/services/getChunkText.ts | 19 +++++ frontend/src/types.ts | 17 ++++- 9 files changed, 281 insertions(+), 12 deletions(-) create mode 100644 frontend/src/components/Popups/ChunkPopUp/index.tsx create mode 100644 frontend/src/services/getChunkText.ts diff --git a/backend/score.py b/backend/score.py index af8400dd5..93a8c55fa 100644 --- a/backend/score.py +++ b/backend/score.py @@ -12,7 +12,7 @@ from langchain_google_vertexai import ChatVertexAI from src.api_response import create_api_response from src.graphDB_dataAccess import graphDBdataAccess -from src.graph_query import get_graph_results +from src.graph_query import get_graph_results,get_chunktext_results from src.chunkid_entities import get_entities_from_chunkids from src.post_processing import create_vector_fulltext_indexes, create_entity_embedding from sse_starlette.sse import EventSourceResponse @@ -818,5 +818,57 @@ async def calculate_metric(question: str = Form(), finally: gc.collect() +@app.post("/fetch_chunktext") +async def fetch_chunktext( + uri: str = Form(), + database: str = Form(), + userName: str = Form(), + password: str = Form(), + document_name: str = Form(), + page_no: int = Form(1) +): + try: + payload_json_obj = { + 'api_name': 'fetch_chunktext', + 'db_url': uri, + 'userName': userName, + 'database': database, + 'document_name': document_name, + 'page_no': page_no, + 'logging_time': formatted_time(datetime.now(timezone.utc)) + } + logger.log_struct(payload_json_obj, "INFO") + start = time.time() + result = await asyncio.to_thread( + get_chunktext_results, + uri=uri, + username=userName, + password=password, + database=database, + document_name=document_name, + page_no=page_no + ) + end = time.time() + elapsed_time = end - start + json_obj = { + 'api_name': 'fetch_chunktext', + 'db_url': uri, + 'document_name': document_name, + 'page_no': page_no, + 'logging_time': formatted_time(datetime.now(timezone.utc)), + 'elapsed_api_time': f'{elapsed_time:.2f}' + } + logger.log_struct(json_obj, "INFO") + return create_api_response('Success', data=result, message=f"Total elapsed API time {elapsed_time:.2f}") + except Exception as e: + job_status = "Failed" + message = "Unable to get chunk text response" + error_message = str(e) + logging.exception(f'Exception in fetch_chunktext: {error_message}') + return create_api_response(job_status, message=message, error=error_message) + finally: + gc.collect() + + if __name__ == "__main__": uvicorn.run(app) diff --git a/backend/src/communities.py b/backend/src/communities.py index d1130150c..ac0813d8d 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -487,4 +487,3 @@ def create_communities(uri, username, password, database,model=COMMUNITY_CREATIO - diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py index fb7333b48..86739ba6c 100644 --- a/backend/src/graph_query.py +++ b/backend/src/graph_query.py @@ -3,7 +3,7 @@ from neo4j import GraphDatabase import os import json -from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY +from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY,CHUNK_TEXT_QUERY,COUNT_CHUNKS_QUERY # from neo4j.debug import watch # watch("neo4j") @@ -226,3 +226,34 @@ def get_graph_results(uri, username, password,database,document_names): driver.close() +def get_chunktext_results(uri, username, password, database, document_name, page_no): + """Retrieves chunk text, position, and page number from graph data with pagination.""" + try: + logging.info("Starting chunk text query process") + offset = 10 + skip = (page_no - 1) * offset + limit = offset + driver = GraphDatabase.driver(uri, auth=(username, password)) + with driver.session(database=database) as session: + total_chunks_result = session.run(COUNT_CHUNKS_QUERY, file_name=document_name) + total_chunks = total_chunks_result.single()["total_chunks"] + total_pages = (total_chunks + offset - 1) // offset # Calculate total pages + records = session.run(CHUNK_TEXT_QUERY, file_name=document_name, skip=skip, limit=limit) + pageitems = [ + { + "text": record["chunk_text"], + "position": record["chunk_position"], + "pagenumber": record["page_number"] + } + for record in records + ] + logging.info(f"Query process completed with {len(pageitems)} chunks retrieved") + return { + "pageitems": pageitems, + "total_pages": total_pages + } + except Exception as e: + logging.error(f"An error occurred in get_chunktext_results. Error: {str(e)}") + raise Exception("An error occurred in get_chunktext_results. Please check the logs for more details.") from e + finally: + driver.close() \ No newline at end of file diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index b58fd3a67..83cb69245 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -161,6 +161,19 @@ ] AS entities """ +COUNT_CHUNKS_QUERY = """ +MATCH (d:Document {fileName: $file_name})<-[:PART_OF]-(c:Chunk) +RETURN count(c) AS total_chunks +""" + +CHUNK_TEXT_QUERY = """ +MATCH (d:Document {fileName: $file_name})<-[:PART_OF]-(c:Chunk) +RETURN c.text AS chunk_text, c.position AS chunk_position, c.page_number AS page_number +ORDER BY c.position +SKIP $skip +LIMIT $limit +""" + ## CHAT SETUP CHAT_MAX_TOKENS = 1000 CHAT_SEARCH_KWARG_SCORE_THRESHOLD = 0.5 diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 28caddc1c..cab5a2673 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -11,6 +11,7 @@ import { CustomFile, OptionType, UserCredentials, + chunkdata, connectionState, } from '../types'; import deleteAPI from '../services/DeleteFiles'; @@ -44,6 +45,8 @@ import retry from '../services/retry'; import { showErrorToast, showNormalToast, showSuccessToast } from '../utils/toasts'; import { useMessageContext } from '../context/UserMessages'; import PostProcessingToast from './Popups/GraphEnhancementDialog/PostProcessingCheckList/PostProcessingToast'; +import { getChunkText } from '../services/getChunkText'; +import ChunkPopUp from './Popups/ChunkPopUp'; const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); @@ -70,6 +73,7 @@ const Content: React.FC = ({ }); const [openGraphView, setOpenGraphView] = useState(false); const [inspectedName, setInspectedName] = useState(''); + const [documentName, setDocumentName] = useState(''); const { setUserCredentials, userCredentials, @@ -85,6 +89,12 @@ const Content: React.FC = ({ const [retryFile, setRetryFile] = useState(''); const [retryLoading, setRetryLoading] = useState(false); const [showRetryPopup, toggleRetryPopup] = useReducer((state) => !state, false); + const [showChunkPopup, toggleChunkPopup] = useReducer((state) => !state, false); + const [chunksLoading, toggleChunksLoading] = useReducer((state) => !state, false); + const [currentPage, setCurrentPage] = useState(0); + const [totalPageCount, setTotalPageCount] = useState(null); + const [textChunks, setTextChunks] = useState([]); + const [alertStateForRetry, setAlertStateForRetry] = useState({ showAlert: false, alertType: 'neutral', @@ -122,7 +132,12 @@ const Content: React.FC = ({ } ); const childRef = useRef(null); - + const incrementPage = () => { + setCurrentPage((prev) => prev + 1); + }; + const decrementPage = () => { + setCurrentPage((prev) => prev - 1); + }; useEffect(() => { if (!init && !searchParams.has('connectURL')) { let session = localStorage.getItem('neo4j.connection'); @@ -149,7 +164,13 @@ const Content: React.FC = ({ setOpenConnection((prev) => ({ ...prev, openPopUp: true })); } }, []); - + useEffect(() => { + if (currentPage >= 1) { + (async () => { + await getChunks(documentName, currentPage); + })(); + } + }, [currentPage, documentName]); useEffect(() => { setFilesData((prevfiles) => { return prevfiles.map((curfile) => { @@ -251,7 +272,15 @@ const Content: React.FC = ({ setModel(selectedOption?.value); } }; - + const getChunks = async (name: string, pageNo: number) => { + toggleChunksLoading(); + const response = await getChunkText(userCredentials as UserCredentials, name, pageNo); + setTextChunks(response.data.data.pageitems); + if (!totalPageCount) { + setTotalPageCount(response.data.data.total_pages); + } + toggleChunksLoading(); + }; const extractData = async (uid: string, isselectedRows = false, filesTobeProcess: CustomFile[]) => { if (!isselectedRows) { const fileItem = filesData.find((f) => f.id == uid); @@ -497,7 +526,7 @@ const Content: React.FC = ({ } }; - function processWaitingFilesOnRefresh() { + const processWaitingFilesOnRefresh = () => { let data = []; const processingFilesCount = filesData.filter((f) => f.status === 'Processing').length; @@ -517,7 +546,7 @@ const Content: React.FC = ({ .filter((f) => f.status === 'New' || f.status == 'Reprocess'); addFilesToQueue(selectedNewFiles as CustomFile[]); } - } + }; const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; @@ -771,6 +800,18 @@ const Content: React.FC = ({ view='contentView' > )} + {showChunkPopup && ( + toggleChunkPopup()} + showChunkPopup={showChunkPopup} + chunks={textChunks} + incrementPage={incrementPage} + decrementPage={decrementPage} + currentPage={currentPage} + totalPageCount={totalPageCount} + > + )} {showEnhancementDialog && ( = ({ setRetryFile(id); toggleRetryPopup(); }} + onChunkView={async (name) => { + setDocumentName(name); + if (name != documentName) { + toggleChunkPopup(); + if (totalPageCount) { + setTotalPageCount(null); + } + setCurrentPage(1); + // await getChunks(name, 1); + } + }} ref={childRef} handleGenerateGraph={processWaitingFilesOnRefresh} > diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index ec367b100..8cc8d48ed 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -38,8 +38,9 @@ import { SourceNode, CustomFile, FileTableProps, UserCredentials, statusupdate, import { useCredentials } from '../context/UserCredentials'; import { ArrowPathIconSolid, - ClipboardDocumentIconOutline, + ClipboardDocumentIconSolid, MagnifyingGlassCircleIconSolid, + DocumentTextIconSolid, } from '@neo4j-ndl/react/icons'; import CustomProgressBar from './UI/CustomProgressBar'; import subscribe from '../services/PollingAPI'; @@ -56,7 +57,7 @@ import { ThemeWrapperContext } from '../context/ThemeWrapper'; let onlyfortheFirstRender = true; const FileTable = forwardRef((props, ref) => { - const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry } = props; + const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry, onChunkView } = props; const { filesData, setFilesData, model, rowSelection, setRowSelection, setSelectedRows, setProcessedCount, queue } = useFileContext(); const { userCredentials, isReadOnlyUser } = useCredentials(); @@ -527,10 +528,25 @@ const FileTable = forwardRef((props, ref) => { handleCopy(copied); }} > - + + + { + onChunkView(info?.row?.original?.name as string); + }} + clean + placement='left' + label='chunktextaction' + text='View Chunks' + size='large' + disabled={info.getValue() === 'Uploading'} + > + ), + size: 300, + minSize: 180, header: () => Actions, footer: (info) => info.column.id, }), diff --git a/frontend/src/components/Popups/ChunkPopUp/index.tsx b/frontend/src/components/Popups/ChunkPopUp/index.tsx new file mode 100644 index 000000000..7966ddd6e --- /dev/null +++ b/frontend/src/components/Popups/ChunkPopUp/index.tsx @@ -0,0 +1,72 @@ +import { Dialog, Typography, Flex, IconButton } from '@neo4j-ndl/react'; +import { ArrowLeftIconOutline, ArrowRightIconOutline } from '@neo4j-ndl/react/icons'; +import { chunkdata } from '../../../types'; +import Loader from '../../../utils/Loader'; +import { useMemo } from 'react'; + +const ChunkPopUp = ({ + showChunkPopup, + chunks, + onClose, + chunksLoading, + incrementPage, + decrementPage, + currentPage, + totalPageCount, +}: { + showChunkPopup: boolean; + chunks: chunkdata[]; + onClose: () => void; + chunksLoading: boolean; + incrementPage: () => void; + decrementPage: () => void; + currentPage: number | null; + totalPageCount: number | null; +}) => { + const sortedChunksData = useMemo(() => { + return chunks.sort((a, b) => a.position - b.position); + }, [chunks]); + return ( + + Text Chunks + + {chunksLoading ? ( + + ) : ( +
      + {sortedChunksData.map((c, idx) => ( +
    1. + + + Position : + {c.position} + + {c.pagenumber ? ( + + Page No :{' '} + {c.pagenumber} + + ) : null} + {c.text} + +
    2. + ))} +
    + )} +
    + {totalPageCount != null && totalPageCount > 1 && ( + + + + + + + + + + + )} +
    + ); +}; +export default ChunkPopUp; diff --git a/frontend/src/services/getChunkText.ts b/frontend/src/services/getChunkText.ts new file mode 100644 index 000000000..f9825dc34 --- /dev/null +++ b/frontend/src/services/getChunkText.ts @@ -0,0 +1,19 @@ +import { UserCredentials, chunksData } from '../types'; +import api from '../API/Index'; + +export const getChunkText = async (userCredentials: UserCredentials, documentName: string, page_no: number) => { + const formData = new FormData(); + formData.append('uri', userCredentials?.uri ?? ''); + formData.append('database', userCredentials?.database ?? ''); + formData.append('userName', userCredentials?.userName ?? ''); + formData.append('password', userCredentials?.password ?? ''); + formData.append('document_name', documentName); + formData.append('page_no', page_no.toString()); + try { + const response = await api.post(`/fetch_chunktext`, formData); + return response; + } catch (error) { + console.log(error); + throw error; + } +}; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 02dbc0a2b..6bc34d2da 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -154,6 +154,7 @@ export interface FileTableProps { connectionStatus: boolean; setConnectionStatus: Dispatch>; onInspect: (id: string) => void; + onChunkView: (name: string) => void; handleGenerateGraph: () => void; onRetry: (id: string) => void; } @@ -379,7 +380,13 @@ export interface commonserverresponse { error?: string; message?: string | orphanTotalNodes; file_name?: string; - data?: labelsAndTypes | labelsAndTypes[] | uploadData | orphanNodeProps[] | dupNodes[]; + data?: + | labelsAndTypes + | labelsAndTypes[] + | uploadData + | orphanNodeProps[] + | dupNodes[] + | { pageitems: chunkdata[]; total_pages: number }; } export interface dupNodeProps { id: string; @@ -397,6 +404,11 @@ export interface selectedDuplicateNodes { firstElementId: string; similarElementIds: string[]; } +export interface chunkdata { + text: string; + position: number; + pagenumber: null | number; +} export interface ScehmaFromText extends Partial { data: labelsAndTypes; } @@ -407,6 +419,9 @@ export interface ServerData extends Partial { export interface duplicateNodesData extends Partial { data: dupNodes[]; } +export interface chunksData extends Partial { + data: { pageitems: chunkdata[]; total_pages: number }; +} export interface OrphanNodeResponse extends Partial { data: orphanNodeProps[]; } From 6d35a34664e52f0d37392ef685466af30b11e844 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 8 Nov 2024 14:33:16 +0530 Subject: [PATCH 225/292] Communities Id to Title (#851) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Staging to main (#735) * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * disabled the sumbit buttom on loading * Deduplication tab (#566) * de-duplication API * Update De-Duplicate query * created the Deduplication tab * added the API service * added the removeable tags for similar nodes in deduplication tab * Integrate Tag * added GraphLabel * added loader state * added the merge service * integrated the merge API * Merge Query issue fixed * Auto refresh the duplicate nodes after merging operation * added the description for de duplication * reset on merging --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Update frontend_docs.adoc (#538) * Update frontend_docs.adoc * doc update * Images * Images folder change * Images folder change * test image * Update frontend_docs.adoc * image change * Update frontend_docs.adoc * Update frontend_docs.adoc * added the Graph Mode SS * added the Query SS * Update frontend_docs.adoc * conflics fix * conflict fix * Update frontend_docs.adoc --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * updated langchain versions (#565) * Update the De-Duplication query * Node relationship id type none issue (#547) * de-duplication API * Update De-Duplicate query * Issue fixed Nodes,Relationship Id and Type None or Blank * added the tooltips * type fix * Unneccory import * added score threshold and added some error handling (#571) * Update requirements.txt * Tooltip and other UI fixes (#572) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-auth… * Update FileTable.tsx * DEV to STAGING (#833) * updated script"integration test cases" * decreased the delay for pollintg API * Graph enhancements (#696) * relationship Changes * addition of relationship labels * onclick to nodes * node-highlight * Build fixex * slash docker change * deactivating previous node/relationshsips * lint fixes * class issue * search * search on basis of id / captions * debounce changes * class changes (#693) * legends highlight * search query reset * node size * changed chat mode names (#702) * env changes * used axios instance for network calls * disabled the toolip when dropdown is open state * format fixes + chat mode naming changes * mode added to info model for entities * Issue fixed, List out of index while getting status of dicuement node * processing count updated on cancel * format fixes * remove whitespace for enviroment variable which due to an error "xxx may not contain whitespace" (#707) * updated disconnected nodes * updated disconnected nodes * fix: Processed count update on failed condition * added disconnected and up nodes * removed __Entity__ labels * removed graph_object * removed graph object in the function * resetting the alert message on success scenario * Modified queries * populate graph schema * not clearing the password when there is error scenario * fixed the vector index loading issue * fix: empty credentials payload for recreate vector index api * chatbot status (#676) * chatbot status * connection status check for ASK button * refresh disable check * review comment resolved * format fixes * added properties and modified to entity labels * Post processing call after all files completion (#716) * Dev To STAGING (#532) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#535) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload ----… * Dev to STAGING (#836) * updated script"integration test cases" * decreased the delay for pollintg API * Graph enhancements (#696) * relationship Changes * addition of relationship labels * onclick to nodes * node-highlight * Build fixex * slash docker change * deactivating previous node/relationshsips * lint fixes * class issue * search * search on basis of id / captions * debounce changes * class changes (#693) * legends highlight * search query reset * node size * changed chat mode names (#702) * env changes * used axios instance for network calls * disabled the toolip when dropdown is open state * format fixes + chat mode naming changes * mode added to info model for entities * Issue fixed, List out of index while getting status of dicuement node * processing count updated on cancel * format fixes * remove whitespace for enviroment variable which due to an error "xxx may not contain whitespace" (#707) * updated disconnected nodes * updated disconnected nodes * fix: Processed count update on failed condition * added disconnected and up nodes * removed __Entity__ labels * removed graph_object * removed graph object in the function * resetting the alert message on success scenario * Modified queries * populate graph schema * not clearing the password when there is error scenario * fixed the vector index loading issue * fix: empty credentials payload for recreate vector index api * chatbot status (#676) * chatbot status * connection status check for ASK button * refresh disable check * review comment resolved * format fixes * added properties and modified to entity labels * Post processing call after all files completion (#716) * Dev To STAGING (#532) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#535) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload ----… * Dev (#837) * updated script"integration test cases" * decreased the delay for pollintg API * Graph enhancements (#696) * relationship Changes * addition of relationship labels * onclick to nodes * node-highlight * Build fixex * slash docker change * deactivating previous node/relationshsips * lint fixes * class issue * search * search on basis of id / captions * debounce changes * class changes (#693) * legends highlight * search query reset * node size * changed chat mode names (#702) * env changes * used axios instance for network calls * disabled the toolip when dropdown is open state * format fixes + chat mode naming changes * mode added to info model for entities * Issue fixed, List out of index while getting status of dicuement node * processing count updated on cancel * format fixes * remove whitespace for enviroment variable which due to an error "xxx may not contain whitespace" (#707) * updated disconnected nodes * updated disconnected nodes * fix: Processed count update on failed condition * added disconnected and up nodes * removed __Entity__ labels * removed graph_object * removed graph object in the function * resetting the alert message on success scenario * Modified queries * populate graph schema * not clearing the password when there is error scenario * fixed the vector index loading issue * fix: empty credentials payload for recreate vector index api * chatbot status (#676) * chatbot status * connection status check for ASK button * refresh disable check * review comment resolved * format fixes * added properties and modified to entity labels * Post processing call after all files completion (#716) * Dev To STAGING (#532) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#535) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co… * merge changes * Update README.md * Update README.md * removal of extra LLm env * Community name * title generation * removed logging statement added for debugging * env changes --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: Ikko Eltociear Ashimine Co-authored-by: Jayanth T Co-authored-by: Jayanth T Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: Michael Hunger Co-authored-by: Kain Shu <44948284+Kain-90@users.noreply.github.com> Co-authored-by: destiny966113 <90891243+destiny966113@users.noreply.github.com> Co-authored-by: Pravesh1988 Co-authored-by: edenbuaa Co-authored-by: kaustubh-darekar Co-authored-by: a-s-poorna --- backend/src/communities.py | 48 ++++++--- backend/src/neighbours.py | 3 +- backend/src/shared/constants.py | 2 +- backend/test_integrationqa.py | 21 ++++ docker-compose.yml | 2 +- example.env | 1 - frontend/Dockerfile | 1 - frontend/src/components/ChatBot/ChunkInfo.tsx | 13 +-- .../components/ChatBot/CommunitiesInfo.tsx | 8 +- .../src/components/ChatBot/EntitiesInfo.tsx | 11 +-- frontend/src/components/ChatBot/chatInfo.ts | 15 +-- frontend/src/components/Content.tsx | 5 +- frontend/src/components/FileTable.tsx | 2 +- .../src/components/Graph/GraphViewModal.tsx | 36 +++---- frontend/src/components/Layout/PageLayout.tsx | 1 - .../ConnectionModal/ConnectionModal.tsx | 2 +- .../Deduplication/index.tsx | 5 +- .../DeleteTabForOrphanNodes/index.tsx | 5 +- frontend/src/components/QuickStarter.tsx | 19 ++-- frontend/src/types.ts | 9 ++ frontend/src/utils/Constants.ts | 99 ++++++++++--------- frontend/src/utils/Utils.ts | 3 + 22 files changed, 173 insertions(+), 138 deletions(-) diff --git a/backend/src/communities.py b/backend/src/communities.py index ac0813d8d..a38b39696 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -107,24 +107,38 @@ STORE_COMMUNITY_SUMMARIES = """ UNWIND $data AS row MERGE (c:__Community__ {id:row.community}) -SET c.summary = row.summary +SET c.summary = row.summary, + c.title = row.title """ + COMMUNITY_SYSTEM_TEMPLATE = "Given input triples, generate the information summary. No pre-amble." -COMMUNITY_TEMPLATE = """Based on the provided nodes and relationships that belong to the same graph community, -generate a natural language summary of the provided information: -{community_info} -Summary:""" +COMMUNITY_TEMPLATE = """ +Based on the provided nodes and relationships that belong to the same graph community, +generate following output in exact format +title: A concise title, no more than 4 words, +summary: A natural language summary of the information +{community_info} +Example output: +title: Example Title, +summary: This is an example summary that describes the key information of this community. +""" PARENT_COMMUNITY_SYSTEM_TEMPLATE = "Given an input list of community summaries, generate a summary of the information" PARENT_COMMUNITY_TEMPLATE = """Based on the provided list of community summaries that belong to the same graph community, -generate a natural language summary of the information.Include all the necessary information as possible +generate following output in exact format +title: A concise title, no more than 4 words, +summary: A natural language summary of the information. Include all the necessary information as much as possible. + {community_info} -Summary:""" +Example output: +title: Example Title, +summary: This is an example summary that describes the key information of this community. +""" GET_COMMUNITY_DETAILS = """ @@ -277,8 +291,17 @@ def process_community_info(community, chain, is_parent=False): combined_text = " ".join(f"Summary {i+1}: {summary}" for i, summary in enumerate(community.get("texts", []))) else: combined_text = prepare_string(community) - summary = chain.invoke({'community_info': combined_text}) - return {"community": community['communityId'], "summary": summary} + summary_response = chain.invoke({'community_info': combined_text}) + lines = summary_response.splitlines() + title = "Untitled Community" + summary = "" + for line in lines: + if line.lower().startswith("title"): + title = line.split(":", 1)[-1].strip() + elif line.lower().startswith("summary"): + summary = line.split(":", 1)[-1].strip() + logging.info(f"Community Title : {title}") + return {"community": community['communityId'], "title":title, "summary": summary} except Exception as e: logging.error(f"Failed to process community {community.get('communityId', 'unknown')}: {e}") return None @@ -291,7 +314,7 @@ def create_community_summaries(gds, model): summaries = [] with ThreadPoolExecutor() as executor: futures = [executor.submit(process_community_info, community, community_chain) for community in community_info_list.to_dict(orient="records")] - + for future in as_completed(futures): result = future.result() if result: @@ -482,8 +505,3 @@ def create_communities(uri, username, password, database,model=COMMUNITY_CREATIO logging.warning("Failed to write communities. Constraint was not applied.") except Exception as e: logging.error(f"Failed to create communities: {e}") - - - - - diff --git a/backend/src/neighbours.py b/backend/src/neighbours.py index 08022ecc6..431d5b4bd 100644 --- a/backend/src/neighbours.py +++ b/backend/src/neighbours.py @@ -20,7 +20,8 @@ labels: [coalesce(apoc.coll.removeAll(labels(node), ['__Entity__'])[0], "*")], element_id: elementId(node), properties: { - id: CASE WHEN node.id IS NOT NULL THEN node.id ELSE node.fileName END + id: CASE WHEN node.id IS NOT NULL THEN node.id ELSE node.fileName END, + title: CASE WHEN node.title IS NOT NULL THEN node.title ELSE " " END } } ] AS nodes, diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 83cb69245..084b5d1ba 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -730,4 +730,4 @@ value "2023-03-15"." "## 5. Strict Compliance\n" "Adhere to the rules strictly. Non-compliance will result in termination." - """ \ No newline at end of file + """ diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 548e9706f..ede8077f7 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -124,6 +124,27 @@ def test_graph_website(model_name): print("Fail: ", e) return weburl_result +def test_graph_website(model_name): + """Test graph creation from a Website page.""" + #graph, model, source_url, source_type + source_url = 'https://www.amazon.com/' + source_type = 'web-url' + create_source_node_graph_web_url(graph, model_name, source_url, source_type) + + weburl_result = extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', '') + logging.info("WebUrl test done") + print(weburl_result) + + try: + assert weburl_result['status'] == 'Completed' + assert weburl_result['nodeCount'] > 0 + assert weburl_result['relationshipCount'] > 0 + print("Success") + except AssertionError as e: + print("Fail: ", e) + return weburl_result + + def test_graph_from_youtube_video(model_name): """Test graph creation from a YouTube video.""" source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' diff --git a/docker-compose.yml b/docker-compose.yml index ea6d2c050..8a0fdc4b2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -53,7 +53,6 @@ services: args: - VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-http://localhost:8000} - VITE_REACT_APP_SOURCES=${VITE_REACT_APP_SOURCES-local,wiki,s3} - - VITE_LLM_MODELS=${VITE_LLM_MODELS-} - VITE_GOOGLE_CLIENT_ID=${VITE_GOOGLE_CLIENT_ID-} - VITE_BLOOM_URL=${VITE_BLOOM_URL-https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true} - VITE_TIME_PER_PAGE=${VITE_TIME_PER_PAGE-50} @@ -62,6 +61,7 @@ services: - VITE_ENV=${VITE_ENV-DEV} - VITE_CHAT_MODES=${VITE_CHAT_MODES-} - VITE_BATCH_SIZE=${VITE_BATCH_SIZE-2} + - VITE_LLM_MODELS=${VITE_LLM_MODELS-} - VITE_LLM_MODELS_PROD=${VITE_LLM_MODELS_PROD-openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash} volumes: - ./frontend:/app diff --git a/example.env b/example.env index 6b542daf1..5d3a598c9 100644 --- a/example.env +++ b/example.env @@ -24,7 +24,6 @@ ENTITY_EMBEDDING=True VITE_BACKEND_API_URL="http://localhost:8000" VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true" VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web" -VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" # ",ollama_llama3" VITE_ENV="DEV" VITE_TIME_PER_PAGE=50 VITE_CHUNK_SIZE=5242880 diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 3053e1ba9..311294f4a 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -20,7 +20,6 @@ RUN yarn install COPY . ./ RUN VITE_BACKEND_API_URL=$VITE_BACKEND_API_URL \ VITE_REACT_APP_SOURCES=$VITE_REACT_APP_SOURCES \ - VITE_LLM_MODELS=$VITE_LLM_MODELS \ VITE_GOOGLE_CLIENT_ID=$VITE_GOOGLE_CLIENT_ID \ VITE_BLOOM_URL=$VITE_BLOOM_URL \ VITE_CHUNK_SIZE=$VITE_CHUNK_SIZE \ diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index 8568e0b5d..aa16d9451 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -21,18 +21,16 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { const [neoRels, setNeoRels] = useState([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); - const [loadingGraphView, setLoadingGraphView] = useState(false); const handleChunkClick = (elementId: string, viewMode: string) => { + setOpenGraphView(true); + setViewPoint('chatInfoView'); handleGraphNodeClick( userCredentials as UserCredentials, elementId, viewMode, setNeoNodes, setNeoRels, - setOpenGraphView, - setViewPoint, - setLoadingGraphView ); }; @@ -72,7 +70,6 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
    handleChunkClick(chunk.element_id, 'Chunk')} > {'View Graph'} @@ -100,7 +97,6 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
    handleChunkClick(chunk.element_id, 'Chunk')} > @@ -123,7 +119,6 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { Similarity Score: {chunk?.score}
    handleChunkClick(chunk.element_id, 'Chunk')} > @@ -146,7 +141,6 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { Similarity Score: {chunk?.score}
    handleChunkClick(chunk.element_id, 'Chunk')} > @@ -169,7 +163,6 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { Similarity Score: {chunk?.score}
    handleChunkClick(chunk.element_id, 'Chunk')} > @@ -196,7 +189,6 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { Similarity Score: {chunk?.score}
    handleChunkClick(chunk.element_id, 'Chunk')} > @@ -228,7 +220,6 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
    handleChunkClick(chunk.element_id, 'Chunk')} > diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index bc8e5e8d3..005eb7ac6 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -13,18 +13,17 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = const [neoRels, setNeoRels] = useState([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); - const [loadingGraphView, setLoadingGraphView] = useState(false); + const [graphLoading, setGraphLoading] = useState(false); const handleCommunityClick = (elementId: string, viewMode: string) => { + setOpenGraphView(true); + setViewPoint('chatInfoView'); handleGraphNodeClick( userCredentials as UserCredentials, elementId, viewMode, setNeoNodes, setNeoRels, - setOpenGraphView, - setViewPoint, - setLoadingGraphView ); }; @@ -42,7 +41,6 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) =
    handleCommunityClick(community.element_id, 'chatInfoView')} >{`ID : ${community.id}`} diff --git a/frontend/src/components/ChatBot/EntitiesInfo.tsx b/frontend/src/components/ChatBot/EntitiesInfo.tsx index 80e4fdafa..c1982a2c8 100644 --- a/frontend/src/components/ChatBot/EntitiesInfo.tsx +++ b/frontend/src/components/ChatBot/EntitiesInfo.tsx @@ -14,7 +14,6 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in const [neoRels, setNeoRels] = useState([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); - const [loadingGraphView, setLoadingGraphView] = useState(false); const groupedEntities = useMemo<{ [key: string]: GroupedEntity }>(() => { const items = infoEntities.reduce((acc, entity) => { @@ -44,15 +43,14 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in }, [labelCounts]); const handleEntityClick = (elementId: string, viewMode: string) => { + setOpenGraphView(true); + setViewPoint('chatInfoView'); handleGraphNodeClick( userCredentials as UserCredentials, elementId, viewMode, setNeoNodes, setNeoRels, - setOpenGraphView, - setViewPoint, - setLoadingGraphView ); }; @@ -69,9 +67,7 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in ? graphonly_entities.map((label, index) => (
    • {Object.keys(label).map((key) => ( @@ -118,7 +114,6 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in handleEntityClick(textId!, 'chatInfoView')} - className={loadingGraphView ? 'cursor-wait' : 'cursor-pointer'} > {text} diff --git a/frontend/src/components/ChatBot/chatInfo.ts b/frontend/src/components/ChatBot/chatInfo.ts index c7e990ae7..ba0183c2b 100644 --- a/frontend/src/components/ChatBot/chatInfo.ts +++ b/frontend/src/components/ChatBot/chatInfo.ts @@ -1,5 +1,6 @@ import { getNeighbors } from '../../services/GraphQuery'; import { NeoNode, NeoRelationship, UserCredentials } from '../../types'; +import { graphLabels } from '../../utils/Constants'; export const handleGraphNodeClick = async ( userCredentials: UserCredentials, @@ -7,18 +8,12 @@ export const handleGraphNodeClick = async ( viewMode: string, setNeoNodes: React.Dispatch>, setNeoRels: React.Dispatch>, - setOpenGraphView: React.Dispatch>, - setViewPoint: React.Dispatch>, - setLoadingGraphView?: React.Dispatch> ) => { - if (setLoadingGraphView) { - setLoadingGraphView(true); - } try { const result = await getNeighbors(userCredentials, elementId); if (result && result.data.data.nodes.length > 0) { let { nodes } = result.data.data; - if (viewMode === 'Chunk') { + if (viewMode === graphLabels.chatInfoView) { nodes = nodes.filter((node: NeoNode) => node.labels.length === 1 && node.properties.id !== null); } const nodeIds = new Set(nodes.map((node: NeoNode) => node.element_id)); @@ -27,14 +22,8 @@ export const handleGraphNodeClick = async ( ); setNeoNodes(nodes); setNeoRels(relationships); - setOpenGraphView(true); - setViewPoint('chatInfoView'); } } catch (error: any) { console.error('Error fetching neighbors:', error); - } finally { - if (setLoadingGraphView) { - setLoadingGraphView(false); - } } }; diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index cab5a2673..594af8628 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -117,10 +117,11 @@ const Content: React.FC = ({ setProcessedCount, setchatModes, } = useFileContext(); - const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'>('tableView'); + const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'|'neighborView'>('tableView'); const [showDeletePopUp, setshowDeletePopUp] = useState(false); const [deleteLoading, setdeleteLoading] = useState(false); const [searchParams] = useSearchParams(); + const [graphLoading, setGraphLoading] = useState(false); const { updateStatusForLargeFiles } = useServerSideEvent( (inMinutes, time, fileName) => { @@ -992,4 +993,4 @@ const Content: React.FC = ({ ); }; -export default Content; +export default Content; \ No newline at end of file diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 8cc8d48ed..42b82572a 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -962,4 +962,4 @@ const FileTable = forwardRef((props, ref) => { ); }); -export default FileTable; +export default FileTable; \ No newline at end of file diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index e50f215d1..28574d30a 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -31,6 +31,7 @@ import CheckboxSelection from './CheckboxSelection'; import ResultOverview from './ResultOverview'; import { ResizePanelDetails } from './ResizePanel'; import GraphPropertiesPanel from './GraphPropertiesPanel'; +import { useGraphContext } from '../../context/GraphLoading'; const GraphViewModal: React.FunctionComponent = ({ open, @@ -46,7 +47,6 @@ const GraphViewModal: React.FunctionComponent = ({ const [relationships, setRelationships] = useState([]); const [allNodes, setAllNodes] = useState([]); const [allRelationships, setAllRelationships] = useState([]); - const [loading, setLoading] = useState(false); const [status, setStatus] = useState<'unknown' | 'success' | 'danger'>('unknown'); const [statusMessage, setStatusMessage] = useState(''); const { userCredentials } = useCredentials(); @@ -57,15 +57,16 @@ const GraphViewModal: React.FunctionComponent = ({ const [graphType, setGraphType] = useState([]); const [disableRefresh, setDisableRefresh] = useState(false); const [selected, setSelected] = useState<{ type: EntityType; id: string } | undefined>(undefined); + const { loadingGraph, setLoadingGraph } = useGraphContext(); const graphQuery: string = graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -107,10 +108,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -138,19 +139,19 @@ const GraphViewModal: React.FunctionComponent = ({ setNodes(finalNodes); setRelationships(finalRels); setNewScheme(schemeVal); - setLoading(false); + setLoadingGraph(false); } setAllNodes(finalNodes); setAllRelationships(finalRels); setScheme(schemeVal); setDisableRefresh(false); } else { - setLoading(false); + setLoadingGraph(false); setStatus('danger'); setStatusMessage(`No Nodes and Relations for the ${inspectedName} file`); } } catch (error: any) { - setLoading(false); + setLoadingGraph(false); setStatus('danger'); setStatusMessage(error.message); } @@ -158,7 +159,7 @@ const GraphViewModal: React.FunctionComponent = ({ useEffect(() => { if (open) { - setLoading(true); + setLoadingGraph(true); setGraphType([]); if (viewPoint !== graphLabels.chatInfoView) { graphApi(); @@ -170,10 +171,10 @@ const GraphViewModal: React.FunctionComponent = ({ setNodes(finalNodes); setRelationships(finalRels); setNewScheme(schemeVal); - setLoading(false); + setLoadingGraph(false); } } - }, [open]); + }, [open, viewPoint]); useEffect(() => { if (debouncedQuery) { @@ -333,6 +334,7 @@ const GraphViewModal: React.FunctionComponent = ({ onDrag: true, }; + console.log('Graph viewModal', loadingGraph); return ( <> = ({ {checkBoxView && ( @@ -361,7 +363,7 @@ const GraphViewModal: React.FunctionComponent = ({
      - {loading ? ( + {loadingGraph ? (
      diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 48e94c023..bf3082fc8 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -31,7 +31,6 @@ export default function PageLayoutNew({ const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); const [showGenericModal, toggleGenericModal] = useReducer((s) => !s, false); - const toggleLeftDrawer = () => { if (largedesktops) { setIsLeftExpanded(!isLeftExpanded); diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index ad7e7b48e..76df732a4 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -460,4 +460,4 @@ export default function ConnectionModal({
      ); -} +} \ No newline at end of file diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 7140bb6b2..b30ba5302 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -30,6 +30,7 @@ import mergeDuplicateNodes from '../../../../services/MergeDuplicateEntities'; import { tokens } from '@neo4j-ndl/base'; import GraphViewModal from '../../../Graph/GraphViewModal'; import { handleGraphNodeClick } from '../../../ChatBot/chatInfo'; +import { useGraphContext } from '../../../../context/GraphLoading'; export default function DeduplicationTab() { const { breakpoints } = tokens; @@ -46,6 +47,7 @@ export default function DeduplicationTab() { const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); const [nodesCount, setNodesCount] = useState(0); + const { setLoadingGraph } = useGraphContext(); const fetchDuplicateNodes = useCallback(async () => { try { setLoading(true); @@ -116,7 +118,8 @@ export default function DeduplicationTab() { setNeoNodes, setNeoRels, setOpenGraphView, - setViewPoint + setViewPoint, + setLoadingGraph ); }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx index bcc2597f1..2d147c9e2 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx @@ -21,6 +21,7 @@ import DeletePopUp from '../../DeletePopUp/DeletePopUp'; import { tokens } from '@neo4j-ndl/base'; import GraphViewModal from '../../../Graph/GraphViewModal'; import { handleGraphNodeClick } from '../../../ChatBot/chatInfo'; +import { useGraphContext } from '../../../../context/GraphLoading'; export default function DeletePopUpForOrphanNodes({ deleteHandler, loading, @@ -41,6 +42,7 @@ export default function DeletePopUpForOrphanNodes({ const [neoRels, setNeoRels] = useState([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); + const { setLoadingGraph } = useGraphContext(); const fetchOrphanNodes = useCallback(async () => { try { @@ -80,7 +82,8 @@ export default function DeletePopUpForOrphanNodes({ setNeoNodes, setNeoRels, setOpenGraphView, - setViewPoint + setViewPoint, + setLoadingGraph, ); }; diff --git a/frontend/src/components/QuickStarter.tsx b/frontend/src/components/QuickStarter.tsx index 1a4e169d2..43de7bd07 100644 --- a/frontend/src/components/QuickStarter.tsx +++ b/frontend/src/components/QuickStarter.tsx @@ -5,6 +5,7 @@ import { FileContextProvider } from '../context/UsersFiles'; import UserCredentialsWrapper from '../context/UserCredentials'; import AlertContextWrapper from '../context/Alert'; import { MessageContextWrapper } from '../context/UserMessages'; +import { GraphContextWrapper } from '../context/GraphLoading'; const QuickStarter: React.FunctionComponent = () => { const [showSettingsModal, setshowSettingsModal] = useState(false); @@ -19,14 +20,16 @@ const QuickStarter: React.FunctionComponent = () => { - -
      - - + + +
      + + + diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 6bc34d2da..6ebf26b45 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -574,6 +574,10 @@ export interface MessagesContextProviderProps { children: ReactNode; } +export interface GraphContextProviderProps { + children: ReactNode; +} + export interface Chunk { id: string; position: number; @@ -730,6 +734,11 @@ export interface MessageContextType { setClearHistoryData: Dispatch>; } +export interface GraphContextType { + loadingGraph: boolean; + setLoadingGraph: Dispatch>; +} + export interface DatabaseStatusProps { isConnected: boolean; isGdsActive: boolean; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 8d9289fd8..06d03d28b 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -12,26 +12,26 @@ export const llms = process.env?.VITE_LLM_MODELS?.trim() != '' ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) : [ - 'diffbot', - 'openai_gpt_3.5', - 'openai_gpt_4o', - 'openai_gpt_4o_mini', - 'gemini_1.5_pro', - 'gemini_1.5_flash', - 'azure_ai_gpt_35', - 'azure_ai_gpt_4o', - 'ollama_llama3', - 'groq_llama3_70b', - 'anthropic_claude_3_5_sonnet', - 'fireworks_llama_v3p2_90b', - 'bedrock_claude_3_5_sonnet', - ]; + 'diffbot', + 'openai_gpt_3.5', + 'openai_gpt_4o', + 'openai_gpt_4o_mini', + 'gemini_1.5_pro', + 'gemini_1.5_flash', + 'azure_ai_gpt_35', + 'azure_ai_gpt_4o', + 'ollama_llama3', + 'groq_llama3_70b', + 'anthropic_claude_3_5_sonnet', + 'fireworks_llama_v3p2_90b', + 'bedrock_claude_3_5_sonnet', + ]; export const defaultLLM = llms?.includes('openai_gpt_4o') ? 'openai_gpt_4o' : llms?.includes('gemini_1.5_pro') - ? 'gemini_1.5_pro' - : 'diffbot'; + ? 'gemini_1.5_pro' + : 'diffbot'; export const supportedLLmsForRagas = [ 'openai_gpt_3.5', 'openai_gpt_4', @@ -76,40 +76,40 @@ export const chatModeReadableLables: Record = { export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',').map((mode) => ({ - mode: mode.trim(), - description: getDescriptionForChatMode(mode.trim()), - })) + mode: mode.trim(), + description: getDescriptionForChatMode(mode.trim()), + })) : [ - { - mode: chatModeLables.vector, - description: 'Performs semantic similarity search on text chunks using vector indexing.', - }, - { - mode: chatModeLables.graph, - description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', - }, - { - mode: chatModeLables['graph+vector'], - description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', - }, - { - mode: chatModeLables.fulltext, - description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', - }, - { - mode: chatModeLables['graph+vector+fulltext'], - description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', - }, - { - mode: chatModeLables['entity search+vector'], - description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', - }, - { - mode: chatModeLables['global search+vector+fulltext'], - description: - 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', - }, - ]; + { + mode: chatModeLables.vector, + description: 'Performs semantic similarity search on text chunks using vector indexing.', + }, + { + mode: chatModeLables.graph, + description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', + }, + { + mode: chatModeLables['graph+vector'], + description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', + }, + { + mode: chatModeLables.fulltext, + description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', + }, + { + mode: chatModeLables['graph+vector+fulltext'], + description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', + }, + { + mode: chatModeLables['entity search+vector'], + description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', + }, + { + mode: chatModeLables['global search+vector+fulltext'], + description: + 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', + }, + ]; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; @@ -291,6 +291,7 @@ export const graphLabels = { docChunk: 'Document & Chunk', community: 'Communities', noNodesRels: 'No Nodes and No relationships', + neighborView: 'neighborView' }; export const RESULT_STEP_SIZE = 25; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 6945c17ca..a65fe2362 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -130,6 +130,9 @@ export const getNodeCaption = (node: any) => { if (node.properties.fileName) { return node.properties.fileName; } + if(node.labels[0] === '__Community__'){ + return node.properties.title; + } return node.properties.id; }; From cd6b4c207b6f8f669529785ed2c40a00f5903250 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 8 Nov 2024 19:51:09 +0530 Subject: [PATCH 226/292] disconnected nodes (#852) --- frontend/src/components/Graph/GraphViewModal.tsx | 5 ++--- frontend/src/utils/Utils.ts | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 28574d30a..e65a2c318 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -124,9 +124,7 @@ const GraphViewModal: React.FunctionComponent = ({ try { const result = await fetchData(); if (result && result.data.data.nodes.length > 0) { - const neoNodes = result.data.data.nodes - .map((f: Node) => f) - .filter((node: ExtendedNode) => node.labels.length === 1); + const neoNodes = result.data.data.nodes; const nodeIds = new Set(neoNodes.map((node: any) => node.element_id)); const neoRels = result.data.data.relationships .map((f: Relationship) => f) @@ -157,6 +155,7 @@ const GraphViewModal: React.FunctionComponent = ({ } }; + console.log('nodes', nodes); useEffect(() => { if (open) { setLoadingGraph(true); diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index a65fe2362..f407f21e0 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -159,11 +159,11 @@ export function extractPdfFileName(url: string): string { export const processGraphData = (neoNodes: ExtendedNode[], neoRels: ExtendedRelationship[]) => { const schemeVal: Scheme = {}; let iterator = 0; - const labels: string[] = neoNodes.map((f: any) => f.labels); + const labels: string[] = neoNodes.flatMap((f: any) => f.labels); for (let index = 0; index < labels.length; index++) { const label = labels[index]; if (schemeVal[label] == undefined) { - schemeVal[label] = calcWordColor(label[0]); + schemeVal[label] = calcWordColor(label); iterator += 1; } } From 399785f1a761146db678197047b776dc7bc0366d Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 8 Nov 2024 15:47:32 +0000 Subject: [PATCH 227/292] loading changes --- frontend/src/components/ChatBot/ChunkInfo.tsx | 15 +++++-- .../components/ChatBot/CommunitiesInfo.tsx | 10 +++-- .../src/components/ChatBot/EntitiesInfo.tsx | 13 +++++-- frontend/src/components/ChatBot/chatInfo.ts | 17 ++++++-- .../src/components/Graph/GraphViewModal.tsx | 39 +++++++++---------- .../Deduplication/index.tsx | 7 +--- .../DeleteTabForOrphanNodes/index.tsx | 7 +--- frontend/src/components/QuickStarter.tsx | 3 -- 8 files changed, 63 insertions(+), 48 deletions(-) diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index aa16d9451..b58ee2c4d 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -21,16 +21,18 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { const [neoRels, setNeoRels] = useState([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); + const [loadingGraphView, setLoadingGraphView] = useState(false); const handleChunkClick = (elementId: string, viewMode: string) => { - setOpenGraphView(true); - setViewPoint('chatInfoView'); handleGraphNodeClick( userCredentials as UserCredentials, elementId, viewMode, setNeoNodes, setNeoRels, + setOpenGraphView, + setViewPoint, + setLoadingGraphView ); }; @@ -70,6 +72,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
      handleChunkClick(chunk.element_id, 'Chunk')} > {'View Graph'} @@ -97,6 +100,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
      handleChunkClick(chunk.element_id, 'Chunk')} > @@ -119,6 +123,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { Similarity Score: {chunk?.score}
      handleChunkClick(chunk.element_id, 'Chunk')} > @@ -141,6 +146,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { Similarity Score: {chunk?.score}
      handleChunkClick(chunk.element_id, 'Chunk')} > @@ -163,6 +169,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { Similarity Score: {chunk?.score}
      handleChunkClick(chunk.element_id, 'Chunk')} > @@ -189,6 +196,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { Similarity Score: {chunk?.score}
      handleChunkClick(chunk.element_id, 'Chunk')} > @@ -220,6 +228,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
      handleChunkClick(chunk.element_id, 'Chunk')} > @@ -252,4 +261,4 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { ); }; -export default ChunkInfo; +export default ChunkInfo; \ No newline at end of file diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index 005eb7ac6..3c6899c5b 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -13,17 +13,18 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = const [neoRels, setNeoRels] = useState([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); - const [graphLoading, setGraphLoading] = useState(false); + const [loadingGraphView, setLoadingGraphView] = useState(false); const handleCommunityClick = (elementId: string, viewMode: string) => { - setOpenGraphView(true); - setViewPoint('chatInfoView'); handleGraphNodeClick( userCredentials as UserCredentials, elementId, viewMode, setNeoNodes, setNeoRels, + setOpenGraphView, + setViewPoint, + setLoadingGraphView ); }; @@ -41,6 +42,7 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) =
      handleCommunityClick(community.element_id, 'chatInfoView')} >{`ID : ${community.id}`} @@ -73,4 +75,4 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = ); }; -export default CommunitiesInfo; +export default CommunitiesInfo; \ No newline at end of file diff --git a/frontend/src/components/ChatBot/EntitiesInfo.tsx b/frontend/src/components/ChatBot/EntitiesInfo.tsx index c1982a2c8..22eca8a57 100644 --- a/frontend/src/components/ChatBot/EntitiesInfo.tsx +++ b/frontend/src/components/ChatBot/EntitiesInfo.tsx @@ -14,6 +14,7 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in const [neoRels, setNeoRels] = useState([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); + const [loadingGraphView, setLoadingGraphView] = useState(false); const groupedEntities = useMemo<{ [key: string]: GroupedEntity }>(() => { const items = infoEntities.reduce((acc, entity) => { @@ -43,14 +44,15 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in }, [labelCounts]); const handleEntityClick = (elementId: string, viewMode: string) => { - setOpenGraphView(true); - setViewPoint('chatInfoView'); handleGraphNodeClick( userCredentials as UserCredentials, elementId, viewMode, setNeoNodes, setNeoRels, + setOpenGraphView, + setViewPoint, + setLoadingGraphView ); }; @@ -67,7 +69,9 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in ? graphonly_entities.map((label, index) => (
      • {Object.keys(label).map((key) => ( @@ -114,6 +118,7 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in handleEntityClick(textId!, 'chatInfoView')} + className={loadingGraphView ? 'cursor-wait' : 'cursor-pointer'} > {text} @@ -141,4 +146,4 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in ); }; -export default EntitiesInfo; +export default EntitiesInfo; \ No newline at end of file diff --git a/frontend/src/components/ChatBot/chatInfo.ts b/frontend/src/components/ChatBot/chatInfo.ts index ba0183c2b..1a229dc70 100644 --- a/frontend/src/components/ChatBot/chatInfo.ts +++ b/frontend/src/components/ChatBot/chatInfo.ts @@ -1,6 +1,5 @@ import { getNeighbors } from '../../services/GraphQuery'; import { NeoNode, NeoRelationship, UserCredentials } from '../../types'; -import { graphLabels } from '../../utils/Constants'; export const handleGraphNodeClick = async ( userCredentials: UserCredentials, @@ -8,12 +7,18 @@ export const handleGraphNodeClick = async ( viewMode: string, setNeoNodes: React.Dispatch>, setNeoRels: React.Dispatch>, + setOpenGraphView: React.Dispatch>, + setViewPoint: React.Dispatch>, + setLoadingGraphView?: React.Dispatch> ) => { + if (setLoadingGraphView) { + setLoadingGraphView(true); + } try { const result = await getNeighbors(userCredentials, elementId); if (result && result.data.data.nodes.length > 0) { let { nodes } = result.data.data; - if (viewMode === graphLabels.chatInfoView) { + if (viewMode === 'Chunk') { nodes = nodes.filter((node: NeoNode) => node.labels.length === 1 && node.properties.id !== null); } const nodeIds = new Set(nodes.map((node: NeoNode) => node.element_id)); @@ -22,8 +27,14 @@ export const handleGraphNodeClick = async ( ); setNeoNodes(nodes); setNeoRels(relationships); + setOpenGraphView(true); + setViewPoint('chatInfoView'); } } catch (error: any) { console.error('Error fetching neighbors:', error); + } finally { + if (setLoadingGraphView) { + setLoadingGraphView(false); + } } -}; +}; \ No newline at end of file diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index e65a2c318..ac35a93ae 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -31,7 +31,6 @@ import CheckboxSelection from './CheckboxSelection'; import ResultOverview from './ResultOverview'; import { ResizePanelDetails } from './ResizePanel'; import GraphPropertiesPanel from './GraphPropertiesPanel'; -import { useGraphContext } from '../../context/GraphLoading'; const GraphViewModal: React.FunctionComponent = ({ open, @@ -47,6 +46,7 @@ const GraphViewModal: React.FunctionComponent = ({ const [relationships, setRelationships] = useState([]); const [allNodes, setAllNodes] = useState([]); const [allRelationships, setAllRelationships] = useState([]); + const [loading, setLoading] = useState(false); const [status, setStatus] = useState<'unknown' | 'success' | 'danger'>('unknown'); const [statusMessage, setStatusMessage] = useState(''); const { userCredentials } = useCredentials(); @@ -57,16 +57,15 @@ const GraphViewModal: React.FunctionComponent = ({ const [graphType, setGraphType] = useState([]); const [disableRefresh, setDisableRefresh] = useState(false); const [selected, setSelected] = useState<{ type: EntityType; id: string } | undefined>(undefined); - const { loadingGraph, setLoadingGraph } = useGraphContext(); const graphQuery: string = graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -108,10 +107,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -137,28 +136,27 @@ const GraphViewModal: React.FunctionComponent = ({ setNodes(finalNodes); setRelationships(finalRels); setNewScheme(schemeVal); - setLoadingGraph(false); + setLoading(false); } setAllNodes(finalNodes); setAllRelationships(finalRels); setScheme(schemeVal); setDisableRefresh(false); } else { - setLoadingGraph(false); + setLoading(false); setStatus('danger'); setStatusMessage(`No Nodes and Relations for the ${inspectedName} file`); } } catch (error: any) { - setLoadingGraph(false); + setLoading(false); setStatus('danger'); setStatusMessage(error.message); } }; - console.log('nodes', nodes); useEffect(() => { if (open) { - setLoadingGraph(true); + setLoading(true); setGraphType([]); if (viewPoint !== graphLabels.chatInfoView) { graphApi(); @@ -170,10 +168,10 @@ const GraphViewModal: React.FunctionComponent = ({ setNodes(finalNodes); setRelationships(finalRels); setNewScheme(schemeVal); - setLoadingGraph(false); + setLoading(false); } } - }, [open, viewPoint]); + }, [open]); useEffect(() => { if (debouncedQuery) { @@ -333,7 +331,6 @@ const GraphViewModal: React.FunctionComponent = ({ onDrag: true, }; - console.log('Graph viewModal', loadingGraph); return ( <> = ({ {checkBoxView && ( @@ -362,7 +359,7 @@ const GraphViewModal: React.FunctionComponent = ({
        - {loadingGraph ? ( + {loading ? (
        @@ -448,4 +445,4 @@ const GraphViewModal: React.FunctionComponent = ({ ); }; -export default GraphViewModal; +export default GraphViewModal; \ No newline at end of file diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index b30ba5302..330f5f657 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -30,7 +30,6 @@ import mergeDuplicateNodes from '../../../../services/MergeDuplicateEntities'; import { tokens } from '@neo4j-ndl/base'; import GraphViewModal from '../../../Graph/GraphViewModal'; import { handleGraphNodeClick } from '../../../ChatBot/chatInfo'; -import { useGraphContext } from '../../../../context/GraphLoading'; export default function DeduplicationTab() { const { breakpoints } = tokens; @@ -47,7 +46,6 @@ export default function DeduplicationTab() { const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); const [nodesCount, setNodesCount] = useState(0); - const { setLoadingGraph } = useGraphContext(); const fetchDuplicateNodes = useCallback(async () => { try { setLoading(true); @@ -118,8 +116,7 @@ export default function DeduplicationTab() { setNeoNodes, setNeoRels, setOpenGraphView, - setViewPoint, - setLoadingGraph + setViewPoint ); }; @@ -355,4 +352,4 @@ export default function DeduplicationTab() { )} ); -} +} \ No newline at end of file diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx index 2d147c9e2..6d4daae10 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx @@ -21,7 +21,6 @@ import DeletePopUp from '../../DeletePopUp/DeletePopUp'; import { tokens } from '@neo4j-ndl/base'; import GraphViewModal from '../../../Graph/GraphViewModal'; import { handleGraphNodeClick } from '../../../ChatBot/chatInfo'; -import { useGraphContext } from '../../../../context/GraphLoading'; export default function DeletePopUpForOrphanNodes({ deleteHandler, loading, @@ -42,7 +41,6 @@ export default function DeletePopUpForOrphanNodes({ const [neoRels, setNeoRels] = useState([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); - const { setLoadingGraph } = useGraphContext(); const fetchOrphanNodes = useCallback(async () => { try { @@ -82,8 +80,7 @@ export default function DeletePopUpForOrphanNodes({ setNeoNodes, setNeoRels, setOpenGraphView, - setViewPoint, - setLoadingGraph, + setViewPoint ); }; @@ -318,4 +315,4 @@ export default function DeletePopUpForOrphanNodes({ )} ); -} +} \ No newline at end of file diff --git a/frontend/src/components/QuickStarter.tsx b/frontend/src/components/QuickStarter.tsx index 43de7bd07..4dc4b4a0a 100644 --- a/frontend/src/components/QuickStarter.tsx +++ b/frontend/src/components/QuickStarter.tsx @@ -5,7 +5,6 @@ import { FileContextProvider } from '../context/UsersFiles'; import UserCredentialsWrapper from '../context/UserCredentials'; import AlertContextWrapper from '../context/Alert'; import { MessageContextWrapper } from '../context/UserMessages'; -import { GraphContextWrapper } from '../context/GraphLoading'; const QuickStarter: React.FunctionComponent = () => { const [showSettingsModal, setshowSettingsModal] = useState(false); @@ -20,7 +19,6 @@ const QuickStarter: React.FunctionComponent = () => { -
        { closeSettingModal={closeSettingModal} /> - From 686ed95dc87c69fe89022563aa3306fb2bfa6c4a Mon Sep 17 00:00:00 2001 From: karanchellani <142801957+karanchellani@users.noreply.github.com> Date: Mon, 11 Nov 2024 13:25:13 +0530 Subject: [PATCH 228/292] Update score.py --- backend/score.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/score.py b/backend/score.py index 93a8c55fa..bbe471e48 100644 --- a/backend/score.py +++ b/backend/score.py @@ -33,7 +33,7 @@ from Secweb.ContentSecurityPolicy import ContentSecurityPolicy from Secweb.XContentTypeOptions import XContentTypeOptions from Secweb.XFrameOptions import XFrame -from fastapi.middleware.gzip import GZipMiddleware +#from fastapi.middleware.gzip import GZipMiddleware from src.ragas_eval import * logger = CustomLogger() @@ -55,7 +55,7 @@ def sick(): app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) app.add_middleware(XContentTypeOptions) app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) -app.add_middleware(GZipMiddleware, minimum_size=1000, compresslevel=5) +#app.add_middleware(GZipMiddleware, minimum_size=1000, compresslevel=5) app.add_middleware( CORSMiddleware, From 4f1af1820fc30b25f86c45d17154caac2bc416ca Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 12 Nov 2024 11:24:45 +0000 Subject: [PATCH 229/292] added middleware --- backend/score.py | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/backend/score.py b/backend/score.py index bbe471e48..03fb75de9 100644 --- a/backend/score.py +++ b/backend/score.py @@ -19,7 +19,7 @@ from src.communities import create_communities from src.neighbours import get_neighbour_nodes import json -from typing import List, Mapping +from typing import List, Mapping, Union from starlette.middleware.sessions import SessionMiddleware import google_auth_oauthlib.flow from google.oauth2.credentials import Credentials @@ -33,8 +33,10 @@ from Secweb.ContentSecurityPolicy import ContentSecurityPolicy from Secweb.XContentTypeOptions import XContentTypeOptions from Secweb.XFrameOptions import XFrame -#from fastapi.middleware.gzip import GZipMiddleware +from fastapi.middleware.gzip import GZipMiddleware from src.ragas_eval import * +from starlette.types import ASGIApp, Message, Receive, Scope, Send +import gzip logger = CustomLogger() CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") @@ -49,14 +51,42 @@ def healthy(): def sick(): return False - +class CustomGZipMiddleware: + def __init__( + self, + app: ASGIApp, + paths: List[str], + minimum_size: int = 1000, + compresslevel: int = 5 + ): + self.app = app + self.paths = paths + self.minimum_size = minimum_size + self.compresslevel = compresslevel + + async def __call__(self, scope: Scope, receive: Receive, send: Send): + if scope["type"] != "http": + return await self.app(scope, receive, send) + + path = scope["path"] + should_compress = any(path.startswith(gzip_path) for gzip_path in self.paths) + + if not should_compress: + return await self.app(scope, receive, send) + + gzip_middleware = GZipMiddleware( + app=self.app, + minimum_size=self.minimum_size, + compresslevel=self.compresslevel + ) + await gzip_middleware(scope, receive, send) app = FastAPI() # SecWeb(app=app, Option={'referrer': False, 'xframe': False}) app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) app.add_middleware(XContentTypeOptions) app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) #app.add_middleware(GZipMiddleware, minimum_size=1000, compresslevel=5) - +app.add_middleware(CustomGZipMiddleware, minimum_size=1000, compresslevel=5,paths=["/sources_list","/url/scan","/extract","/chat_bot","/chunk_entities","/get_neighbours","/graph_query","/schema","/populate_graph_schema","/get_unconnected_nodes_list","/get_duplicate_nodes","/fetch_chunktext"]) app.add_middleware( CORSMiddleware, allow_origins=["*"], From 1c29940f35d1a4029371e3340983e308ce0e8850 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 12 Nov 2024 11:32:27 +0000 Subject: [PATCH 230/292] removed the unused state --- frontend/src/components/Content.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 594af8628..8e8516666 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -121,7 +121,7 @@ const Content: React.FC = ({ const [showDeletePopUp, setshowDeletePopUp] = useState(false); const [deleteLoading, setdeleteLoading] = useState(false); const [searchParams] = useSearchParams(); - const [graphLoading, setGraphLoading] = useState(false); + const { updateStatusForLargeFiles } = useServerSideEvent( (inMinutes, time, fileName) => { From 1d14749dd36d9ac92823d93580dc1d9bd4da5062 Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Thu, 14 Nov 2024 13:15:35 +0530 Subject: [PATCH 231/292] Youtube timestamp (#877) * youtube timestamp added to metadata * updated timestamps format while extraction * added fix for last chunk * updated default values of timestamp --------- Co-authored-by: kaustubh-darekar --- backend/src/chunkid_entities.py | 4 ++-- backend/src/document_sources/youtube.py | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index 31ae07496..7d23e23dd 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -74,8 +74,8 @@ def process_chunk_data(chunk_data): for chunk in record["chunks"]: chunk.update(doc_properties) if chunk["fileSource"] == "youtube": - chunk["start_time"] = min(time_to_seconds(chunk["start_time"]),time_to_seconds(chunk["end_time"])) - chunk["end_time"] = time_to_seconds(chunk["end_time"]) + chunk["start_time"] = min(time_to_seconds(chunk.get('start_time',0)),time_to_seconds(chunk.get("end_time",0))) + chunk["end_time"] = time_to_seconds(chunk.get("end_time",0)) chunk_properties.append(chunk) return chunk_properties diff --git a/backend/src/document_sources/youtube.py b/backend/src/document_sources/youtube.py index e30de301e..dacda09f0 100644 --- a/backend/src/document_sources/youtube.py +++ b/backend/src/document_sources/youtube.py @@ -42,7 +42,7 @@ def get_youtube_combined_transcript(youtube_id): transcript_dict = get_youtube_transcript(youtube_id) transcript='' for td in transcript_dict: - transcript += ''.join(td['text']) + transcript += ''.join(td['text'])+" " return transcript except Exception as e: message = f"Youtube transcript is not available for youtube Id: {youtube_id}" @@ -83,9 +83,20 @@ def get_documents_from_youtube(url): # print(f'youtube page_content: {youtube_transcript[0].page_content}') # print(f'youtube id: {youtube_transcript[0].metadata["id"]}') # print(f'youtube title: {youtube_transcript[0].metadata["snippet"]["title"]}') - transcript= get_youtube_combined_transcript(match.group(1)) + transcript= get_youtube_transcript(match.group(1)) + transcript_content='' + counter = YOUTUBE_CHUNK_SIZE_SECONDS + pages = [] + for i, td in enumerate(transcript): + if td['start'] < counter: + transcript_content += ''.join(td['text'])+" " + else : + transcript_content += ''.join(td['text'])+" " + pages.append(Document(page_content=transcript_content.strip(), metadata={'start_timestamp':str(timedelta(seconds = counter-YOUTUBE_CHUNK_SIZE_SECONDS)).split('.')[0], 'end_timestamp':str(timedelta(seconds = td['start'])).split('.')[0]})) + counter += YOUTUBE_CHUNK_SIZE_SECONDS + transcript_content='' + pages.append(Document(page_content=transcript_content.strip(), metadata={'start_timestamp':str(timedelta(seconds = counter-YOUTUBE_CHUNK_SIZE_SECONDS)).split('.')[0], 'end_timestamp':str(timedelta(seconds =transcript[-1]['start'] if transcript else counter)).split('.')[0]})) # Handle empty transcript_pieces file_name = match.group(1)#youtube_transcript[0].metadata["snippet"]["title"] - pages = [Document(page_content=transcript)] return file_name, pages except Exception as e: error_message = str(e) From d6f4ac6a06699ed354d09b25642ff4176b9db6c1 Mon Sep 17 00:00:00 2001 From: kaustubh-darekar Date: Thu, 14 Nov 2024 14:51:37 +0530 Subject: [PATCH 232/292] Handled Nonetype error during global search. (#876) --- backend/src/QA_integration.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index b7fcbd665..12d974c3b 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -278,7 +278,9 @@ def retrieve_documents(doc_retriever, messages): except Exception as e: error_message = f"Error retrieving documents: {str(e)}" logging.error(error_message) - raise RuntimeError(error_message) + docs = None + transformed_question = None + return docs,transformed_question From a5a989de48de4e05f20017102008814840162f17 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:15:15 +0530 Subject: [PATCH 233/292] Additional metrics using ground truth (#855) * Updating ragas metrics * added the service for additional metrics * additional metrics api * Adding Rouge to requirement * changes done for additional metrics for gemini model * Additional metrics changes related to gemini model * Adding Rouge_Score Version * Api Integration * payload changes * payload fix * Fixing Eval Error * Adding fact_score metric * code refactoring * table integration * data binding * Integrated additional metrics on multimodes * removed fact score * Removing Fact Score * fix: Multimode fix * custommiddleware for gzip * removed unused state * message changes * uncommented gzipmiddleware * code refactoring * removed settings modal code * Table UI Fixes * removed state * UX improvements for chunks popup * added the status check * ndl version changes * tip and dropdown changes * icon fixes * contextmenu fix * Box CSS fix * icon fixes * icon changes * IsRoot fix * added the tooltip for metrics * Menu fix inside modal * hover color fix * menu changes * format and lint fixes --------- Co-authored-by: a-s-poorna Co-authored-by: kaustubh-darekar --- backend/requirements.txt | 4 +- backend/score.py | 35 +- backend/src/QA_integration.py | 2 +- backend/src/ragas_eval.py | 48 + frontend/package.json | 9 +- frontend/src/App.css | 11 +- frontend/src/HOC/CustomModal.tsx | 7 +- frontend/src/HOC/withVisibility.tsx | 14 + frontend/src/assets/images/chunks.svg | 221 ++ .../src/components/ChatBot/ChatInfoModal.tsx | 319 ++- .../src/components/ChatBot/ChatModeToggle.tsx | 38 +- .../components/ChatBot/ChatModesSwitch.tsx | 16 +- frontend/src/components/ChatBot/Chatbot.tsx | 71 +- frontend/src/components/ChatBot/ChunkInfo.tsx | 47 +- .../components/ChatBot/CommunitiesInfo.tsx | 13 +- .../src/components/ChatBot/EntitiesInfo.tsx | 16 +- .../ChatBot/ExpandedChatButtonContainer.tsx | 42 +- .../components/ChatBot/MetricsCheckbox.tsx | 18 + .../src/components/ChatBot/MetricsTab.tsx | 26 +- .../components/ChatBot/MultiModeMetrics.tsx | 115 +- .../src/components/ChatBot/SourcesInfo.tsx | 12 +- frontend/src/components/ChatBot/chatInfo.ts | 2 +- frontend/src/components/Content.tsx | 89 +- .../components/DataSources/AWS/S3Modal.tsx | 56 +- .../components/DataSources/GCS/GCSModal.tsx | 64 +- .../components/DataSources/Local/DropZone.tsx | 10 +- .../Local/DropZoneForSmallLayouts.tsx | 23 +- frontend/src/components/Dropdown.tsx | 20 +- frontend/src/components/FileTable.tsx | 123 +- .../components/Graph/CheckboxSelection.tsx | 12 +- .../components/Graph/GraphPropertiesTable.tsx | 4 +- .../src/components/Graph/GraphViewModal.tsx | 32 +- .../src/components/Graph/ResultOverview.tsx | 22 +- .../src/components/Layout/DrawerChatbot.tsx | 2 +- .../src/components/Layout/DrawerDropzone.tsx | 8 +- frontend/src/components/Layout/Header.tsx | 2 +- frontend/src/components/Layout/PageLayout.tsx | 49 +- frontend/src/components/Layout/SideNav.tsx | 158 +- .../components/Popups/ChunkPopUp/index.tsx | 41 +- .../ConnectionModal/ConnectionModal.tsx | 98 +- .../VectorIndexMisMatchAlert.tsx | 4 +- .../Popups/DeletePopUp/DeletePopUp.tsx | 8 +- .../Deduplication/index.tsx | 45 +- .../DeleteTabForOrphanNodes/index.tsx | 41 +- .../EntityExtractionSetting.tsx | 38 +- .../SelectedJobList.tsx | 6 +- .../PostProcessingCheckList/index.tsx | 6 +- .../Popups/GraphEnhancementDialog/index.tsx | 61 +- .../LargeFilePopUp/ConfirmationDialog.tsx | 6 +- .../Popups/LargeFilePopUp/LargeFilesAlert.tsx | 22 +- .../Popups/RetryConfirmation/Index.tsx | 20 +- .../Popups/Settings/SchemaFromText.tsx | 31 +- .../Popups/Settings/SettingModal.tsx | 285 -- frontend/src/components/QuickStarter.tsx | 22 +- .../src/components/UI/ButtonWithToolTip.tsx | 30 +- frontend/src/components/UI/CustomCheckBox.tsx | 2 +- frontend/src/components/UI/CustomMenu.tsx | 30 + frontend/src/components/UI/ErrroBoundary.tsx | 4 +- frontend/src/components/UI/FallBackDialog.tsx | 2 +- .../src/components/UI/IconButtonToolTip.tsx | 37 +- frontend/src/components/UI/Menu.tsx | 50 - frontend/src/components/UI/TipWrapper.tsx | 14 +- .../WebSources/CustomSourceInput.tsx | 39 +- .../WebSources/GenericSourceModal.tsx | 36 +- frontend/src/context/UsersFiles.tsx | 4 +- frontend/src/services/AdditionalMetrics.ts | 31 + frontend/src/types.ts | 20 +- frontend/src/utils/Constants.ts | 134 +- frontend/src/utils/Utils.ts | 2 +- frontend/yarn.lock | 2520 +++++++++-------- 70 files changed, 2964 insertions(+), 2485 deletions(-) create mode 100644 frontend/src/HOC/withVisibility.tsx create mode 100644 frontend/src/assets/images/chunks.svg create mode 100644 frontend/src/components/ChatBot/MetricsCheckbox.tsx delete mode 100644 frontend/src/components/Popups/Settings/SettingModal.tsx create mode 100644 frontend/src/components/UI/CustomMenu.tsx delete mode 100644 frontend/src/components/UI/Menu.tsx create mode 100644 frontend/src/services/AdditionalMetrics.ts diff --git a/backend/requirements.txt b/backend/requirements.txt index 8fc0e0bda..5f0f5dc62 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -179,5 +179,5 @@ PyMuPDF==1.24.5 pypandoc==1.13 graphdatascience==1.10 Secweb==1.11.0 -ragas==0.1.14 - +ragas==0.2.2 +rouge_score==0.1.2 diff --git a/backend/score.py b/backend/score.py index 03fb75de9..d1ec0e68e 100644 --- a/backend/score.py +++ b/backend/score.py @@ -85,7 +85,6 @@ async def __call__(self, scope: Scope, receive: Receive, send: Send): app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) app.add_middleware(XContentTypeOptions) app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) -#app.add_middleware(GZipMiddleware, minimum_size=1000, compresslevel=5) app.add_middleware(CustomGZipMiddleware, minimum_size=1000, compresslevel=5,paths=["/sources_list","/url/scan","/extract","/chat_bot","/chunk_entities","/get_neighbours","/graph_query","/schema","/populate_graph_schema","/get_unconnected_nodes_list","/get_duplicate_nodes","/fetch_chunktext"]) app.add_middleware( CORSMiddleware, @@ -847,6 +846,40 @@ async def calculate_metric(question: str = Form(), ) finally: gc.collect() + + +@app.post('/additional_metrics') +async def calculate_additional_metrics(question: str = Form(), + context: str = Form(), + answer: str = Form(), + reference: str = Form(), + model: str = Form(), + mode: str = Form(), +): + try: + context_list = [str(item).strip() for item in json.loads(context)] if context else [] + answer_list = [str(item).strip() for item in json.loads(answer)] if answer else [] + mode_list = [str(item).strip() for item in json.loads(mode)] if mode else [] + result = await get_additional_metrics(question, context_list,answer_list, reference, model) + if result is None or "error" in result: + return create_api_response( + 'Failed', + message='Failed to calculate evaluation metrics.', + error=result.get("error", "Ragas evaluation returned null") + ) + data = {mode: {metric: result[i][metric] for metric in result[i]} for i, mode in enumerate(mode_list)} + return create_api_response('Success', data=data) + except Exception as e: + logging.exception(f"Error while calculating evaluation metrics: {e}") + return create_api_response( + 'Failed', + message="Error while calculating evaluation metrics", + error=str(e) + ) + finally: + gc.collect() + + @app.post("/fetch_chunktext") async def fetch_chunktext( diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 12d974c3b..fff7d7923 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -662,7 +662,7 @@ def QA_RAG(graph,model, question, document_names, session_id, mode, write_access if document_names and not chat_mode_settings["document_filter"]: result = { "session_id": "", - "message": "This chat mode does support document selection", + "message": "Please deselect all documents in the table before using this chat mode", "info": { "sources": [], "model": "", diff --git a/backend/src/ragas_eval.py b/backend/src/ragas_eval.py index 8052cb9a2..687db37aa 100644 --- a/backend/src/ragas_eval.py +++ b/backend/src/ragas_eval.py @@ -7,6 +7,16 @@ from ragas import evaluate from ragas.metrics import answer_relevancy, faithfulness from src.shared.common_fn import load_embedding_model +from ragas.dataset_schema import SingleTurnSample +from ragas.metrics import BleuScore, RougeScore, SemanticSimilarity, ContextEntityRecall +from ragas.metrics._factual_correctness import FactualCorrectness +from ragas.llms import LangchainLLMWrapper +from langchain_openai import ChatOpenAI +from langchain.embeddings import OpenAIEmbeddings +from ragas.embeddings import LangchainEmbeddingsWrapper +import nltk + +nltk.download('punkt') load_dotenv() EMBEDDING_MODEL = os.getenv("RAGAS_EMBEDDING_MODEL") @@ -52,3 +62,41 @@ def get_ragas_metrics(question: str, context: list, answer: list, model: str): except Exception as e: logging.exception(f"Error during metrics evaluation: {e}") return {"error": str(e)} + + +async def get_additional_metrics(question: str, contexts: list, answers: list, reference: str, model_name: str): + """Calculates multiple metrics for given question, answers, contexts, and reference.""" + try: + if ("diffbot" in model_name) or ("ollama" in model_name): + raise ValueError(f"Unsupported model for evaluation: {model_name}") + llm, model_name = get_llm(model=model_name) + ragas_llm = LangchainLLMWrapper(llm) + embeddings = EMBEDDING_FUNCTION + embedding_model = LangchainEmbeddingsWrapper(embeddings=embeddings) + rouge_scorer = RougeScore() + semantic_scorer = SemanticSimilarity() + entity_recall_scorer = ContextEntityRecall() + entity_recall_scorer.llm = ragas_llm + semantic_scorer.embeddings = embedding_model + metrics = [] + for response, context in zip(answers, contexts): + sample = SingleTurnSample(response=response, reference=reference) + rouge_score = await rouge_scorer.single_turn_ascore(sample) + rouge_score = round(rouge_score,4) + semantic_score = await semantic_scorer.single_turn_ascore(sample) + semantic_score = round(semantic_score, 4) + if "gemini" in model_name: + entity_recall_score = "Not Available" + else: + entity_sample = SingleTurnSample(reference=reference, retrieved_contexts=[context]) + entity_recall_score = await entity_recall_scorer.single_turn_ascore(entity_sample) + entity_recall_score = round(entity_recall_score, 4) + metrics.append({ + "rouge_score": rouge_score, + "semantic_score": semantic_score, + "context_entity_recall_score": entity_recall_score + }) + return metrics + except Exception as e: + logging.exception("Error in get_additional_metrics") + return {"error": str(e)} \ No newline at end of file diff --git a/frontend/package.json b/frontend/package.json index 9e51f89fa..81d6f24b4 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -15,11 +15,12 @@ "@mui/material": "^5.15.10", "@mui/styled-engine": "^5.15.9", "@neo4j-devtools/word-color": "^0.0.8", - "@neo4j-ndl/base": "^2.12.7", - "@neo4j-ndl/react": "^2.16.9", - "@neo4j-nvl/base": "^0.3.3", - "@neo4j-nvl/react": "^0.3.3", + "@neo4j-ndl/base": "^3.0.10", + "@neo4j-ndl/react": "^3.0.17", + "@neo4j-nvl/base": "^0.3.6", + "@neo4j-nvl/react": "^0.3.6", "@react-oauth/google": "^0.12.1", + "@tanstack/react-table": "^8.20.5", "@types/uuid": "^9.0.7", "axios": "^1.6.5", "clsx": "^2.1.1", diff --git a/frontend/src/App.css b/frontend/src/App.css index e912a05e2..da6c34af6 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -25,7 +25,7 @@ } .contentWithExpansion { - width: calc(-840px + 100dvw); + width: calc(-807px + 100dvw); height: calc(100dvh - 58px); padding: 3px; display: flex; @@ -386,4 +386,13 @@ .custom-menu { min-width: 250px; max-width: 305px; +} +.ndl-modal-root{ + z-index: 39 !important; +} +.tbody-dark .ndl-data-grid-tr:hover { + --cell-background: rgb(60 63 68) !important; +} +.tbody-light .ndl-data-grid-tr:hover { + --cell-background: rgb(226 227 229) !important; } \ No newline at end of file diff --git a/frontend/src/HOC/CustomModal.tsx b/frontend/src/HOC/CustomModal.tsx index e756a092d..814a4343c 100644 --- a/frontend/src/HOC/CustomModal.tsx +++ b/frontend/src/HOC/CustomModal.tsx @@ -16,7 +16,7 @@ const CustomModal: React.FC = ({ return ( = ({ {status !== 'unknown' && ( setStatus('unknown')} type={status} name='Custom Banner' + usage='inline' /> )}
        {children}
        - diff --git a/frontend/src/HOC/withVisibility.tsx b/frontend/src/HOC/withVisibility.tsx new file mode 100644 index 000000000..057c38bd0 --- /dev/null +++ b/frontend/src/HOC/withVisibility.tsx @@ -0,0 +1,14 @@ +interface VisibilityProps { + isVisible: boolean; +} +export function withVisibility

        (WrappedComponent: React.ComponentType

        ) { + const VisibityControlled = (props: P & VisibilityProps) => { + if (props.isVisible === false) { + return null; + } + + return ; + }; + + return VisibityControlled; +} diff --git a/frontend/src/assets/images/chunks.svg b/frontend/src/assets/images/chunks.svg new file mode 100644 index 000000000..e1aa08f32 --- /dev/null +++ b/frontend/src/assets/images/chunks.svg @@ -0,0 +1,221 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index b3c47b4be..5aee9067a 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -3,34 +3,43 @@ import { Typography, Flex, Tabs, - CypherCodeBlock, - CypherCodeBlockProps, + Code, useCopyToClipboard, Banner, useMediaQuery, Button, + TextArea, + IconButton, + Accordion, } from '@neo4j-ndl/react'; import { DocumentDuplicateIconOutline, ClipboardDocumentCheckIconOutline } from '@neo4j-ndl/react/icons'; import '../../styling/info.css'; import Neo4jRetrievalLogo from '../../assets/images/Neo4jRetrievalLogo.png'; import { ExtendedNode, UserCredentials, chatInfoMessage } from '../../types'; -import { useContext, useEffect, useMemo, useState } from 'react'; +import { useEffect, useMemo, useReducer, useRef, useState } from 'react'; import GraphViewButton from '../Graph/GraphViewButton'; import { chunkEntitiesAPI } from '../../services/ChunkEntitiesInfo'; import { useCredentials } from '../../context/UserCredentials'; -import { ThemeWrapperContext } from '../../context/ThemeWrapper'; import { tokens } from '@neo4j-ndl/base'; import ChunkInfo from './ChunkInfo'; import EntitiesInfo from './EntitiesInfo'; import SourcesInfo from './SourcesInfo'; import CommunitiesInfo from './CommunitiesInfo'; -import { chatModeLables, chatModeReadableLables, supportedLLmsForRagas } from '../../utils/Constants'; +import { + chatModeLables, + chatModeReadableLables, + mergeNestedObjects, + supportedLLmsForRagas, +} from '../../utils/Constants'; import { Relationship } from '@neo4j-nvl/base'; import { getChatMetrics } from '../../services/GetRagasMetric'; import MetricsTab from './MetricsTab'; import { Stack } from '@mui/material'; import { capitalizeWithUnderscore, getNodes } from '../../utils/Utils'; import MultiModeMetrics from './MultiModeMetrics'; +import getAdditionalMetrics from '../../services/AdditionalMetrics'; +import { withVisibility } from '../../HOC/withVisibility'; +import MetricsCheckbox from './MetricsCheckbox'; const ChatInfoModal: React.FC = ({ sources, @@ -80,24 +89,40 @@ const ChatInfoModal: React.FC = ({ : 3 ); const { userCredentials } = useCredentials(); - const themeUtils = useContext(ThemeWrapperContext); const [, copy] = useCopyToClipboard(); const [copiedText, setcopiedText] = useState(false); const [showMetricsTable, setShowMetricsTable] = useState(Boolean(metricDetails)); const [showMultiModeMetrics, setShowMultiModeMetrics] = useState(Boolean(multiModelMetrics.length)); const [multiModeError, setMultiModeError] = useState(''); + const [enableReference, toggleReferenceVisibility] = useReducer((state: boolean) => !state, false); + const textAreaRef = useRef(null); + const [expandedItemIdValue, setExpandedId] = useState(null); + const [isAdditionalMetricsEnabled, setIsAdditionalMetricsEnabled] = useState( + multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length > 3 + ? true + : multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length <= 3 + ? false + : null + ); + const [isAdditionalMetricsWithSingleMode, setIsAdditionalMetricsWithSingleMode] = useState( + metricDetails != undefined && Object.keys(metricDetails).length > 2 + ? true + : metricDetails != undefined && Object.keys(metricDetails).length <= 2 + ? false + : null + ); - const actions: CypherCodeBlockProps['actions'] = useMemo( + const actions: React.ComponentProps>[] = useMemo( () => [ { title: 'copy', - 'aria-label': 'copy', + ariaLabel: 'copy', children: ( <> {copiedText ? ( ) : ( - + )} ), @@ -183,79 +208,126 @@ const ChatInfoModal: React.FC = ({ setActiveTab(tabId); }; const loadMetrics = async () => { - if (activeChatmodes) { - if (Object.keys(activeChatmodes).length <= 1) { - setShowMetricsTable(true); - const [defaultMode] = Object.keys(activeChatmodes); - try { - toggleMetricsLoading(); - const response = await getChatMetrics(metricquestion, [metriccontexts], [metricanswer], metricmodel, [ - defaultMode, - ]); - toggleMetricsLoading(); - if (response.data.status === 'Success') { - const data = response; - saveMetrics(data.data.data[defaultMode]); - } else { - throw new Error(response.data.error); - } - } catch (error) { - if (error instanceof Error) { - toggleMetricsLoading(); - console.log('Error in getting chat metrics', error); - saveMetrics({ faithfulness: 0, answer_relevancy: 0, error: error.message }); + // @ts-ignore + const referenceText = textAreaRef?.current?.value ?? ''; + const metricsPromise = []; + if (activeChatmodes != undefined && Object.keys(activeChatmodes).length <= 1) { + setShowMetricsTable(true); + const [defaultMode] = Object.keys(activeChatmodes); + try { + toggleMetricsLoading(); + metricsPromise.push( + getChatMetrics(metricquestion, [metriccontexts], [metricanswer], metricmodel, [defaultMode]) + ); + if (referenceText.trim() != '') { + metricsPromise.push( + getAdditionalMetrics(metricquestion, [metriccontexts], [metricanswer], referenceText, metricmodel, [ + defaultMode, + ]) + ); + } + const metricsResponse = await Promise.allSettled(metricsPromise); + const successresponse = []; + for (let index = 0; index < metricsResponse.length; index++) { + const metricPromise = metricsResponse[index]; + if (metricPromise.status === 'fulfilled' && metricPromise.value.data.status === 'Success') { + successresponse.push(metricPromise.value.data.data); } } - } else { - setShowMultiModeMetrics(true); + setIsAdditionalMetricsWithSingleMode(successresponse.length === 2); toggleMetricsLoading(); - const contextarray = Object.values(activeChatmodes).map((r) => { - return r.metric_contexts; - }); - const answerarray = Object.values(activeChatmodes).map((r) => { - return r.metric_answer; - }); - const modesarray = Object.keys(activeChatmodes).map((mode) => { - return mode; - }); - try { - const responses = await getChatMetrics( - metricquestion, - contextarray as string[], - answerarray as string[], - metricmodel, - modesarray - ); - toggleMetricsLoading(); - if (responses.data.status === 'Success') { - const modewisedata = responses.data.data; - const metricsdata = Object.entries(modewisedata).map(([mode, scores]) => { - return { mode, answer_relevancy: scores.answer_relevancy, faithfulness: scores.faithfulness }; - }); - saveMultimodemetrics(metricsdata); + const mergedState = successresponse.reduce((acc, cur) => { + if (acc[defaultMode]) { + acc[defaultMode] = { ...acc[defaultMode], ...cur[defaultMode] }; } else { - throw new Error(responses.data.error); + acc[defaultMode] = cur[defaultMode]; } - } catch (error) { + return acc; + }, {}); + saveMetrics(mergedState[defaultMode]); + } catch (error) { + if (error instanceof Error) { + setShowMetricsTable(false); toggleMetricsLoading(); console.log('Error in getting chat metrics', error); - if (error instanceof Error) { - setMultiModeError(error.message); + saveMetrics({ faithfulness: 0, answer_relevancy: 0, error: error.message }); + } + } + } else if (activeChatmodes != undefined) { + setShowMultiModeMetrics(true); + toggleMetricsLoading(); + const values = Object.values(activeChatmodes); + const keys = Object.keys(activeChatmodes); + const contextarray = values.map((r) => { + return r.metric_contexts; + }); + const answerarray = values.map((r) => { + return r.metric_answer; + }); + const modesarray = keys.map((mode) => { + return mode; + }); + try { + metricsPromise.push( + getChatMetrics(metricquestion, contextarray as string[], answerarray as string[], metricmodel, modesarray) + ); + if (referenceText.trim() != '') { + metricsPromise.push( + getAdditionalMetrics( + metricquestion, + contextarray as string[], + answerarray as string[], + referenceText, + metricmodel, + modesarray + ) + ); + } + const metricsResponse = await Promise.allSettled(metricsPromise); + toggleMetricsLoading(); + const successResponse = []; + for (let index = 0; index < metricsResponse.length; index++) { + const metricPromise = metricsResponse[index]; + if (metricPromise.status === 'fulfilled' && metricPromise.value.data.status === 'Success') { + successResponse.push(metricPromise.value.data.data); } } + setIsAdditionalMetricsEnabled(successResponse.length === 2); + const metricsdata = Object.entries(mergeNestedObjects(successResponse)).map(([mode, scores]) => { + return { mode, ...scores }; + }); + saveMultimodemetrics(metricsdata); + } catch (error) { + setShowMultiModeMetrics(false); + toggleMetricsLoading(); + console.log('Error in getting chat metrics', error); + if (error instanceof Error) { + setMultiModeError(error.message); + } } } }; - + const MetricsCheckBoxWithCheck = withVisibility(MetricsCheckbox); + const TextareaWithCheck = withVisibility(() => ( + + )); + const isMultiModes = useMemo( + () => activeChatmodes != null && Object.keys(activeChatmodes).length > 1, + [activeChatmodes] + ); + const isSingleMode = useMemo( + () => activeChatmodes != null && Object.keys(activeChatmodes).length <= 1, + [activeChatmodes] + ); return ( - - +

        +
        - +
        Retrieval information To generate this response, the process took {response_time} seconds, @@ -268,10 +340,12 @@ const ChatInfoModal: React.FC = ({ {' '} mode. - - +
        +
        {error?.length > 0 ? ( - {error} + + {error} + ) : ( {mode === chatModeLables['global search+vector+fulltext'] ? ( @@ -308,7 +382,7 @@ const ChatInfoModal: React.FC = ({ - + {!supportedLLmsForRagas.includes(metricmodel) && ( = ({ . } + usage='inline' > )} @@ -335,41 +410,88 @@ const ChatInfoModal: React.FC = ({ about 20 seconds . You'll see detailed scores shortly. - - - Faithfulness: Determines How accurately the answer reflects the - provided information - - - Answer Relevancy: Determines How well the answer addresses the - user's question. - - +
        + { + setExpandedId(e); + }} + > + + Determines How accurately the answer reflects the provided information. + + + Determines How well the answer addresses the user's question. + + {(isAdditionalMetricsWithSingleMode || isAdditionalMetricsEnabled) && ( + <> + + Determines How much the generated answer matches the reference answer, word-for-word + + + Determines How well the generated answer understands the meaning of the reference answer. + + + Determines Measures the recall of entities present in both reference and retrieved contexts + relative to the reference. + + + )} + +
        - {showMultiModeMetrics && activeChatmodes != null && Object.keys(activeChatmodes).length > 1 && ( + {showMultiModeMetrics && isMultiModes && ( )} - {showMetricsTable && activeChatmodes != null && Object.keys(activeChatmodes).length <= 1 && ( + {showMetricsTable && isSingleMode && ( )} - {!metricDetails && activeChatmodes != undefined && Object.keys(activeChatmodes).length <= 1 && ( - - )} - {!multiModelMetrics.length && activeChatmodes != undefined && Object.keys(activeChatmodes).length > 1 && ( + + + + + {isSingleMode && + (isAdditionalMetricsWithSingleMode === false || isAdditionalMetricsWithSingleMode === null) && ( + + )} + {isMultiModes && (isAdditionalMetricsEnabled === false || isAdditionalMetricsEnabled === null) && (
        ); }; export default ChatInfoModal; diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 0a9a81e10..b99f06b96 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -1,33 +1,30 @@ import { StatusIndicator, Typography } from '@neo4j-ndl/react'; -import { useMemo, useEffect } from 'react'; import { useFileContext } from '../../context/UsersFiles'; -import CustomMenu from '../UI/Menu'; +import CustomMenu from '../UI/CustomMenu'; import { chatModeLables, chatModes as AvailableModes, chatModeReadableLables } from '../../utils/Constants'; import { capitalize } from '@mui/material'; import { capitalizeWithPlus } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; +import { useMemo } from 'react'; export default function ChatModeToggle({ menuAnchor, closeHandler = () => {}, open, - anchorPortal = true, - disableBackdrop = false, + isRoot, }: { - menuAnchor: HTMLElement | null; + menuAnchor: React.RefObject; closeHandler?: () => void; open: boolean; - anchorPortal?: boolean; - disableBackdrop?: boolean; + isRoot: boolean; }) { const { setchatModes, chatModes, postProcessingTasks } = useFileContext(); const isCommunityAllowed = postProcessingTasks.includes('enable_communities'); const { isGdsActive } = useCredentials(); - useEffect(() => { - if (!chatModes.length) { - setchatModes([chatModeLables['graph+vector+fulltext']]); - } - }, [chatModes.length]); + if (!chatModes.length) { + setchatModes([chatModeLables['graph+vector+fulltext']]); + } + const memoizedChatModes = useMemo(() => { return isGdsActive && isCommunityAllowed ? AvailableModes @@ -44,7 +41,6 @@ export default function ChatModeToggle({ } else { setchatModes((prev) => [...prev, m.mode]); } - closeHandler(); }; return { title: ( @@ -59,7 +55,10 @@ export default function ChatModeToggle({
    • ), - onClick: handleModeChange, + onClick: (e: React.MouseEvent) => { + handleModeChange(); + e.stopPropagation(); + }, disabledCondition: false, description: ( @@ -72,15 +71,8 @@ export default function ChatModeToggle({ ), }; }); - }, [chatModes, memoizedChatModes, closeHandler]); + }, [chatModes, memoizedChatModes]); return ( - + ); } diff --git a/frontend/src/components/ChatBot/ChatModesSwitch.tsx b/frontend/src/components/ChatBot/ChatModesSwitch.tsx index 4ace49af5..39d47507a 100644 --- a/frontend/src/components/ChatBot/ChatModesSwitch.tsx +++ b/frontend/src/components/ChatBot/ChatModesSwitch.tsx @@ -23,13 +23,13 @@ export default function ChatModesSwitch({ return ( switchToOtherMode(currentModeIndex - 1)} - aria-label='left' + ariaLabel='left' > - + switchToOtherMode(currentModeIndex + 1)} - aria-label='right' + ariaLabel='right' > - + ); diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 3fa666e1e..5a7bf0121 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -42,22 +42,21 @@ import { downloadClickHandler, getDateTime } from '../../utils/Utils'; import ChatModesSwitch from './ChatModesSwitch'; import CommonActions from './CommonChatActions'; const InfoModal = lazy(() => import('./ChatInfoModal')); +if (typeof window !== 'undefined') { + if (!sessionStorage.getItem('session_id')) { + const id = uuidv4(); + sessionStorage.setItem('session_id', id); + } +} +const sessionId = sessionStorage.getItem('session_id') ?? ''; const Chatbot: FC = (props) => { - const { - messages: listMessages, - setMessages: setListMessages, - isLoading, - isFullScreen, - clear, - connectionStatus, - } = props; + const { messages: listMessages, setMessages: setListMessages, isLoading, isFullScreen, connectionStatus } = props; const [inputMessage, setInputMessage] = useState(''); const [loading, setLoading] = useState(isLoading); const { userCredentials } = useCredentials(); const { model, chatModes, selectedRows, filesData } = useFileContext(); const messagesEndRef = useRef(null); - const [sessionId, setSessionId] = useState(sessionStorage.getItem('session_id') ?? ''); const [showInfoModal, setShowInfoModal] = useState(false); const [sourcesModal, setSourcesModal] = useState([]); const [modelModal, setModelModal] = useState(''); @@ -123,13 +122,6 @@ const Chatbot: FC = (props) => { const saveCommunities = (chatCommunities: Community[]) => { setCommunities(chatCommunities); }; - useEffect(() => { - if (!sessionStorage.getItem('session_id')) { - const id = uuidv4(); - setSessionId(id); - sessionStorage.setItem('session_id', id); - } - }, []); const simulateTypingEffect = (messageId: number, response: ResponseMode, mode: string, message: string) => { let index = 0; @@ -319,19 +311,9 @@ const Chatbot: FC = (props) => { }; useEffect(() => { scrollToBottom(); - }, [listMessages]); - - useEffect(() => { setLoading(() => listMessages.some((msg) => msg.isLoading || msg.isTyping)); }, [listMessages]); - useEffect(() => { - if (clear) { - cancel(); - setListMessages((msgs) => msgs.map((msg) => ({ ...msg, speaking: false }))); - } - }, [clear]); - const handleCopy = (message: string, id: number) => { copy(message); setListMessages((msgs) => @@ -440,10 +422,10 @@ const Chatbot: FC = (props) => { className='-ml-4' hasStatus name='KM' - shape='square' size='x-large' source={ChatBotAvatar} status={connectionStatus ? 'online' : 'offline'} + shape='square' type='image' /> ) : ( @@ -451,9 +433,9 @@ const Chatbot: FC = (props) => { className='' hasStatus name='KM' - shape='square' size='x-large' status={connectionStatus ? 'online' : 'offline'} + shape='square' type='image' /> )} @@ -555,12 +537,14 @@ const Chatbot: FC = (props) => { className={`n-bg-palette-neutral-bg-default flex-grow-7 ${ isFullScreen ? 'w-[calc(100%-105px)]' : 'w-[70%]' }`} - aria-label='chatbot-input' - type='text' value={inputMessage} - fluid + isFluid onChange={handleInputChange} - name='chatbot-input' + htmlAttributes={{ + type: 'text', + 'aria-label': 'chatbot-input', + name: 'chatbot-input', + }} /> = (props) => { className: 'n-p-token-4 n-bg-palette-neutral-bg-weak n-rounded-lg', }} onClose={() => setShowInfoModal(false)} - open={showInfoModal} + isOpen={showInfoModal} size={activeChat?.currentMode === chatModeLables['entity search+vector'] ? 'large' : 'medium'} >
      { downloadClickHandler( { @@ -609,19 +596,21 @@ const Chatbot: FC = (props) => { ); }} > - + "" setShowInfoModal(false)} > - +
      = ({ loading, chunks, mode }) => { return ( <> {loading ? ( - +
      - +
      ) : chunks?.length > 0 ? (
        @@ -71,9 +71,10 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
      handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} @@ -83,7 +84,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { <>
      - + = ({ loading, chunks, mode }) => { handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} @@ -124,8 +126,9 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
      handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} @@ -147,8 +150,9 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
      handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} @@ -170,8 +174,9 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
      handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} @@ -185,7 +190,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { <>
      - + {chunk?.url}
      @@ -197,8 +202,9 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
      handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} @@ -229,8 +235,9 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
      handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} @@ -261,4 +268,4 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { ); }; -export default ChunkInfo; \ No newline at end of file +export default ChunkInfo; diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index 3c6899c5b..30d533193 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -1,4 +1,4 @@ -import { Box, LoadingSpinner, Flex, Typography, TextLink } from '@neo4j-ndl/react'; +import { LoadingSpinner, Flex, Typography, TextLink } from '@neo4j-ndl/react'; import { FC, useState } from 'react'; import ReactMarkdown from 'react-markdown'; import { CommunitiesProps, UserCredentials } from '../../types'; @@ -31,9 +31,9 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = return ( <> {loading ? ( - +
      - +
      ) : communities?.length > 0 ? (
        @@ -43,8 +43,9 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = handleCommunityClick(community.element_id, 'chatInfoView')} + htmlAttributes={{ + onClick: () => handleCommunityClick(community.element_id, 'chatInfoView'), + }} >{`ID : ${community.id}`} {mode === chatModeLables['global search+vector+fulltext'] && community.score && ( @@ -75,4 +76,4 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = ); }; -export default CommunitiesInfo; \ No newline at end of file +export default CommunitiesInfo; diff --git a/frontend/src/components/ChatBot/EntitiesInfo.tsx b/frontend/src/components/ChatBot/EntitiesInfo.tsx index 22eca8a57..0a90862a4 100644 --- a/frontend/src/components/ChatBot/EntitiesInfo.tsx +++ b/frontend/src/components/ChatBot/EntitiesInfo.tsx @@ -1,4 +1,4 @@ -import { Box, GraphLabel, LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; +import { GraphLabel, LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; import { FC, useMemo, useState } from 'react'; import { EntitiesProps, GroupedEntity, UserCredentials } from '../../types'; import { calcWordColor } from '@neo4j-devtools/word-color'; @@ -59,9 +59,9 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in return ( <> {loading ? ( - +
        - +
        ) : (mode !== 'graph' && Object.keys(groupedEntities)?.length > 0) || (mode == 'graph' && Object.keys(graphonly_entities)?.length > 0) ? (
          @@ -76,7 +76,7 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in
            {Object.keys(label).map((key) => (
          • - + {key} = ({ loading, mode, graphonly_entities, in key={index} className='flex items-center mb-2 text-ellipsis whitespace-nowrap max-w-[100%)] overflow-hidden' > - + {label === '__Community__' ? graphLabels.community : label} ({labelCounts[label]}) = ({ loading, mode, graphonly_entities, in return ( handleEntityClick(textId!, 'chatInfoView')} + htmlAttributes={{ + onClick: () => handleEntityClick(textId!, 'chatInfoView'), + }} className={loadingGraphView ? 'cursor-wait' : 'cursor-pointer'} > {text} @@ -146,4 +148,4 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in ); }; -export default EntitiesInfo; \ No newline at end of file +export default EntitiesInfo; diff --git a/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx index 44bddc805..b86ea04f3 100644 --- a/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx +++ b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx @@ -1,37 +1,37 @@ import { TrashIconOutline, XMarkIconOutline } from '@neo4j-ndl/react/icons'; import ChatModeToggle from './ChatModeToggle'; -import { Box, IconButton } from '@neo4j-ndl/react'; +import { IconButton } from '@neo4j-ndl/react'; import { IconProps } from '../../types'; import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { tooltips } from '../../utils/Constants'; -import { memo, useState } from 'react'; +import { memo, useRef, useState } from 'react'; import { RiChatSettingsLine } from 'react-icons/ri'; const ExpandedChatButtonContainer: React.FC = ({ closeChatBot, deleteOnClick, messages }) => { - const [chatAnchor, setchatAnchor] = useState(null); + const chatAnchor = useRef(null); const [showChatModeOption, setshowChatModeOption] = useState(false); return (
            setshowChatModeOption(false)} - anchorPortal={true} - disableBackdrop={true} open={showChatModeOption} menuAnchor={chatAnchor} + isRoot={false} /> - - { - setchatAnchor(e.currentTarget); - setshowChatModeOption(true); - }} - clean - text='Chat mode' - placement='bottom' - label='Chat mode' - > - - +
            +
            + { + setshowChatModeOption(true); + }} + clean + text='Chat mode' + placement='bottom' + label='Chat mode' + > + + +
            = ({ closeChatBot, delete > - - + + - +
            ); }; diff --git a/frontend/src/components/ChatBot/MetricsCheckbox.tsx b/frontend/src/components/ChatBot/MetricsCheckbox.tsx new file mode 100644 index 000000000..31c1ab7f5 --- /dev/null +++ b/frontend/src/components/ChatBot/MetricsCheckbox.tsx @@ -0,0 +1,18 @@ +import { Checkbox } from '@neo4j-ndl/react'; + +function MetricsCheckbox({ + enableReference, + toggleReferenceVisibility, +}: { + enableReference: boolean; + toggleReferenceVisibility: React.DispatchWithoutAction; +}) { + return ( + + ); +} +export default MetricsCheckbox; diff --git a/frontend/src/components/ChatBot/MetricsTab.tsx b/frontend/src/components/ChatBot/MetricsTab.tsx index 55d37db4c..40e0b11fc 100644 --- a/frontend/src/components/ChatBot/MetricsTab.tsx +++ b/frontend/src/components/ChatBot/MetricsTab.tsx @@ -1,5 +1,5 @@ import { Banner, Box, DataGrid, DataGridComponents, Typography } from '@neo4j-ndl/react'; -import { memo, useMemo, useRef } from 'react'; +import { memo, useContext, useMemo, useRef } from 'react'; import { useReactTable, getCoreRowModel, @@ -9,6 +9,7 @@ import { getSortedRowModel, } from '@tanstack/react-table'; import { capitalize } from '../../utils/Utils'; +import { ThemeWrapperContext } from '../../context/ThemeWrapper'; function MetricsTab({ metricsLoading, metricDetails, @@ -17,14 +18,14 @@ function MetricsTab({ metricsLoading: boolean; metricDetails: | { - faithfulness: number; - answer_relevancy: number; + [key: string]: number | string; } | undefined; error: string; }) { - const columnHelper = createColumnHelper<{ metric: string; score: number }>(); + const columnHelper = createColumnHelper<{ metric: string; score: number | string }>(); const tableRef = useRef(null); + const { colorMode } = useContext(ThemeWrapperContext); const columns = useMemo( () => [ @@ -47,7 +48,7 @@ function MetricsTab({ header: () => Metric, footer: (info) => info.column.id, }), - columnHelper.accessor((row) => row.score, { + columnHelper.accessor((row) => row.score as number, { id: 'Score', cell: (info) => { return {info.getValue().toFixed(2)}; @@ -77,7 +78,9 @@ function MetricsTab({ return ( {error != undefined && error?.trim() != '' ? ( - {error} + + {error} + ) : ( , + Body: () => ( + + ), PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { return ( )} diff --git a/frontend/src/components/ChatBot/MultiModeMetrics.tsx b/frontend/src/components/ChatBot/MultiModeMetrics.tsx index 4cf4aefd6..6744af10e 100644 --- a/frontend/src/components/ChatBot/MultiModeMetrics.tsx +++ b/frontend/src/components/ChatBot/MultiModeMetrics.tsx @@ -7,21 +7,24 @@ import { getSortedRowModel, } from '@tanstack/react-table'; import { capitalize } from '../../utils/Utils'; -import { useMemo, useRef } from 'react'; +import { useContext, useEffect, useMemo, useRef } from 'react'; import { Banner, Box, DataGrid, DataGridComponents, Typography } from '@neo4j-ndl/react'; import { multimodelmetric } from '../../types'; +import { ThemeWrapperContext } from '../../context/ThemeWrapper'; export default function MultiModeMetrics({ data, metricsLoading, error, + isWithAdditionalMetrics, }: { data: multimodelmetric[]; metricsLoading: boolean; error: string; + isWithAdditionalMetrics: boolean | null; }) { + const { colorMode } = useContext(ThemeWrapperContext); const tableRef = useRef(null); - const columnHelper = createColumnHelper(); const columns = useMemo( () => [ @@ -44,20 +47,41 @@ export default function MultiModeMetrics({ header: () => Mode, footer: (info) => info.column.id, }), - columnHelper.accessor((row) => row.answer_relevancy, { - id: 'Answer Relevancy', + columnHelper.accessor((row) => row.answer_relevancy as number, { + id: 'Relevancy', cell: (info) => { return {info.getValue().toFixed(2)}; }, - header: () => Answer Relevancy, + header: () => Relevancy, }), - columnHelper.accessor((row) => row.faithfulness, { + columnHelper.accessor((row) => row.faithfulness as number, { id: 'Score', cell: (info) => { return {info.getValue().toFixed(2)}; }, header: () => Faithfulness, }), + columnHelper.accessor((row) => row.context_entity_recall_score as number, { + id: 'Recall Score', + cell: (info) => { + return {info.getValue()?.toFixed(2)}; + }, + header: () => Context Score, + }), + columnHelper.accessor((row) => row.semantic_score as number, { + id: 'Semantic Score', + cell: (info) => { + return {info.getValue()?.toFixed(2)}; + }, + header: () => Semantic Score, + }), + columnHelper.accessor((row) => row.rouge_score as number, { + id: 'Rouge Score', + cell: (info) => { + return {info.getValue()?.toFixed(2)}; + }, + header: () => Rouge Score, + }), ], [] ); @@ -74,42 +98,63 @@ export default function MultiModeMetrics({ enableSorting: true, getSortedRowModel: getSortedRowModel(), }); + useEffect(() => { + if (isWithAdditionalMetrics === false) { + table.setColumnVisibility({ 'Recall Score': false, 'Semantic Score': false, 'Rouge Score': false }); + } else { + table.resetColumnVisibility(true); + } + }, [isWithAdditionalMetrics, table]); + return ( {error?.trim() != '' ? ( - {error} + + {error} + ) : ( - , - PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { - return ( - + ( + - ); - }, - }} - /> + ), + PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { + return ( + + ); + }, + }} + isKeyboardNavigable={false} + /> +
      )}
      ); diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx index ddfe92a6e..ace69a07f 100644 --- a/frontend/src/components/ChatBot/SourcesInfo.tsx +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -1,6 +1,6 @@ import { FC, useContext } from 'react'; import { Chunk, SourcesProps } from '../../types'; -import { Box, LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; +import { LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; import { DocumentTextIconOutline, GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; import { getLogo, isAllowedHost, youtubeLinkValidation } from '../../utils/Utils'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; @@ -28,9 +28,9 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { return ( <> {loading ? ( - +
      - +
      ) : mode === 'entity search+vector' && uniqueChunks.length ? (
        {uniqueChunks @@ -65,7 +65,7 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { {isAllowedHost(link, ['wikipedia.org']) && (
        Wikipedia Logo - + = ({ loading, mode, chunks, sources }) => { <>
        - + = ({ loading, mode, chunks, sources }) => { !isAllowedHost(link, ['storage.googleapis.com', 'wikipedia.org', 'www.youtube.com']) && (
        - + {link}
        diff --git a/frontend/src/components/ChatBot/chatInfo.ts b/frontend/src/components/ChatBot/chatInfo.ts index 1a229dc70..c7e990ae7 100644 --- a/frontend/src/components/ChatBot/chatInfo.ts +++ b/frontend/src/components/ChatBot/chatInfo.ts @@ -37,4 +37,4 @@ export const handleGraphNodeClick = async ( setLoadingGraphView(false); } } -}; \ No newline at end of file +}; diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 8e8516666..3193a69bb 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -60,7 +60,6 @@ const Content: React.FC = ({ setIsSchema, showEnhancementDialog, toggleEnhancementDialog, - closeSettingModal, }) => { const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); @@ -105,7 +104,6 @@ const Content: React.FC = ({ filesData, setFilesData, setModel, - model, selectedNodes, selectedRels, setSelectedNodes, @@ -117,12 +115,13 @@ const Content: React.FC = ({ setProcessedCount, setchatModes, } = useFileContext(); - const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'|'neighborView'>('tableView'); + const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView' | 'neighborView'>( + 'tableView' + ); const [showDeletePopUp, setshowDeletePopUp] = useState(false); const [deleteLoading, setdeleteLoading] = useState(false); const [searchParams] = useSearchParams(); - const { updateStatusForLargeFiles } = useServerSideEvent( (inMinutes, time, fileName) => { showNormalToast(`${fileName} will take approx ${time} ${inMinutes ? 'Min' : 'Sec'}`); @@ -133,11 +132,13 @@ const Content: React.FC = ({ } ); const childRef = useRef(null); - const incrementPage = () => { + const incrementPage = async () => { setCurrentPage((prev) => prev + 1); + await getChunks(documentName, currentPage + 1); }; - const decrementPage = () => { + const decrementPage = async () => { setCurrentPage((prev) => prev - 1); + await getChunks(documentName, currentPage - 1); }; useEffect(() => { if (!init && !searchParams.has('connectURL')) { @@ -165,23 +166,6 @@ const Content: React.FC = ({ setOpenConnection((prev) => ({ ...prev, openPopUp: true })); } }, []); - useEffect(() => { - if (currentPage >= 1) { - (async () => { - await getChunks(documentName, currentPage); - })(); - } - }, [currentPage, documentName]); - useEffect(() => { - setFilesData((prevfiles) => { - return prevfiles.map((curfile) => { - return { - ...curfile, - model: curfile.status === 'New' || curfile.status === 'Reprocess' ? model : curfile.model, - }; - }); - }); - }, [model]); useEffect(() => { if (afterFirstRender) { @@ -272,6 +256,15 @@ const Content: React.FC = ({ if (selectedOption?.value) { setModel(selectedOption?.value); } + setFilesData((prevfiles) => { + return prevfiles.map((curfile) => { + return { + ...curfile, + model: + curfile.status === 'New' || curfile.status === 'Reprocess' ? selectedOption?.value ?? '' : curfile.model, + }; + }); + }); }; const getChunks = async (name: string, pageNo: number) => { toggleChunksLoading(); @@ -814,11 +807,7 @@ const Content: React.FC = ({ > )} {showEnhancementDialog && ( - + )}
        @@ -840,24 +829,28 @@ const Content: React.FC = ({ isGdsActive={isGdsActive} uri={userCredentials && userCredentials?.uri} /> -
        - {!isSchema ? ( - - ) : selectedNodes.length || selectedRels.length ? ( - - ) : ( - - )} - {isSchema ? ( - - {(!selectedNodes.length || !selectedNodes.length) && 'Empty'} Graph Schema configured - {selectedNodes.length || selectedRels.length - ? `(${selectedNodes.length} Labels + ${selectedRels.length} Rel Types)` - : ''} - - ) : ( - No Graph Schema configured - )} +
        +
        + {!isSchema ? ( + + ) : selectedNodes.length || selectedRels.length ? ( + + ) : ( + + )} +
        +
        + {isSchema ? ( + + {(!selectedNodes.length || !selectedNodes.length) && 'Empty'} Graph Schema configured + {selectedNodes.length || selectedRels.length + ? `(${selectedNodes.length} Labels + ${selectedRels.length} Rel Types)` + : ''} + + ) : ( + No Graph Schema configured + )} +
        @@ -909,7 +902,7 @@ const Content: React.FC = ({ setTotalPageCount(null); } setCurrentPage(1); - // await getChunks(name, 1); + await getChunks(name, 1); } }} ref={childRef} @@ -993,4 +986,4 @@ const Content: React.FC = ({ ); }; -export default Content; \ No newline at end of file +export default Content; diff --git a/frontend/src/components/DataSources/AWS/S3Modal.tsx b/frontend/src/components/DataSources/AWS/S3Modal.tsx index c23b94a3b..51144c930 100644 --- a/frontend/src/components/DataSources/AWS/S3Modal.tsx +++ b/frontend/src/components/DataSources/AWS/S3Modal.tsx @@ -153,55 +153,61 @@ const S3Modal: React.FC = ({ hideModal, open }) => {
        setValid(validation(bucketUrl) && isFocused), + onKeyDown: handleKeyDown, + 'aria-label': 'Bucket URL', + placeholder: 's3://data.neo4j.com/pdf/', + }} value={bucketUrl} - disabled={false} + isDisabled={false} label='Bucket URL' - aria-label='Bucket URL' - placeholder='s3://data.neo4j.com/pdf/' - autoFocus - fluid - required + isFluid={true} + isRequired={true} errorText={!isValid && isFocused && 'Please Fill The Valid URL'} - onBlur={() => setValid(validation(bucketUrl) && isFocused)} onChange={(e) => { setisFocused(true); setBucketUrl(e.target.value); }} - onKeyDown={handleKeyDown} />
        { setAccessKey(e.target.value); }} - onKeyDown={handleKeyDown} /> { setSecretKey(e.target.value); }} - onKeyDown={handleKeyDown} />
        diff --git a/frontend/src/components/DataSources/GCS/GCSModal.tsx b/frontend/src/components/DataSources/GCS/GCSModal.tsx index a68faeb97..8893c4f70 100644 --- a/frontend/src/components/DataSources/GCS/GCSModal.tsx +++ b/frontend/src/components/DataSources/GCS/GCSModal.tsx @@ -1,5 +1,5 @@ import { TextInput } from '@neo4j-ndl/react'; -import { useCallback, useEffect, useState } from 'react'; +import { useCallback, useState } from 'react'; import { useCredentials } from '../../../context/UserCredentials'; import { useFileContext } from '../../../context/UsersFiles'; import { urlScanAPI } from '../../../services/URLScan'; @@ -41,17 +41,6 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => setprojectId(''); }; - useEffect(() => { - if (status != 'unknown') { - setTimeout(() => { - setStatusMessage(''); - setStatus('unknown'); - reset(); - hideModal(); - }, 5000); - } - }, []); - const googleLogin = useGoogleLogin({ onSuccess: async (codeResponse) => { try { @@ -195,48 +184,55 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) =>
        { setprojectId(e.target.value); }} - onKeyDown={handleKeyPress} > { setbucketName(e.target.value); }} - onKeyDown={handleKeyPress} /> { setFolderName(e.target.value); }} - onKeyDown={handleKeyPress} />
        diff --git a/frontend/src/components/DataSources/Local/DropZone.tsx b/frontend/src/components/DataSources/Local/DropZone.tsx index bddbb65f0..38a6dd1d9 100644 --- a/frontend/src/components/DataSources/Local/DropZone.tsx +++ b/frontend/src/components/DataSources/Local/DropZone.tsx @@ -1,5 +1,5 @@ import { Dropzone, Flex, Typography } from '@neo4j-ndl/react'; -import { useState, useEffect, FunctionComponent } from 'react'; +import { useState, FunctionComponent, useEffect } from 'react'; import Loader from '../../../utils/Loader'; import { v4 as uuidv4 } from 'uuid'; import { useCredentials } from '../../../context/UserCredentials'; @@ -68,7 +68,6 @@ const DropZone: FunctionComponent = () => { setFilesData(copiedFilesData); } }; - useEffect(() => { if (selectedFiles.length > 0) { for (let index = 0; index < selectedFiles.length; index++) { @@ -79,6 +78,7 @@ const DropZone: FunctionComponent = () => { } } }, [selectedFiles]); + const uploadFileInChunks = (file: File) => { const totalChunks = Math.ceil(file.size / chunkSize); const chunkProgressIncrement = 100 / totalChunks; @@ -127,7 +127,7 @@ const DropZone: FunctionComponent = () => { if (curfile.name == file.name) { return { ...curfile, - uploadprogess: chunkNumber * chunkProgressIncrement, + uploadProgress: chunkNumber * chunkProgressIncrement, }; } return curfile; @@ -139,7 +139,7 @@ const DropZone: FunctionComponent = () => { if (curfile.name == file.name) { return { ...curfile, - uploadprogess: chunkNumber * chunkProgressIncrement, + uploadProgress: chunkNumber * chunkProgressIncrement, }; } return curfile; @@ -179,7 +179,7 @@ const DropZone: FunctionComponent = () => { return { ...curfile, status: 'New', - uploadprogess: 100, + uploadProgress: 100, }; } return curfile; diff --git a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx index d7fb1e56d..1b27f4eb7 100644 --- a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx +++ b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx @@ -15,18 +15,7 @@ export default function DropZoneForSmallLayouts() { const [isLoading, setIsLoading] = useState(false); const [isClicked, setIsClicked] = useState(false); const { userCredentials } = useCredentials(); - const [selectedFiles, setSelectedFiles] = useState([]); - useEffect(() => { - if (selectedFiles.length > 0) { - for (let index = 0; index < selectedFiles.length; index++) { - const file = selectedFiles[index]; - if (filesData[index]?.status == 'None' && isClicked) { - uploadFileInChunks(file); - } - } - } - }, [selectedFiles]); const uploadFileInChunks = (file: File) => { const totalChunks = Math.ceil(file.size / chunkSize); @@ -214,11 +203,21 @@ export default function DropZoneForSmallLayouts() { setFilesData(copiedFilesData); } }; + useEffect(() => { + if (selectedFiles.length > 0) { + for (let index = 0; index < selectedFiles.length; index++) { + const file = selectedFiles[index]; + if (filesData[index]?.status == 'None' && isClicked) { + uploadFileInChunks(file); + } + } + } + }, [selectedFiles]); return ( <>
        - {isLoading ? : } + {isLoading ? : }
        ); diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index 3aae5aa6f..a3ebd0d5a 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -1,4 +1,4 @@ -import { Dropdown, Tip, useMediaQuery } from '@neo4j-ndl/react'; +import { Tooltip, useMediaQuery, Select } from '@neo4j-ndl/react'; import { OptionType, ReusableDropdownProps } from '../types'; import { memo, useMemo } from 'react'; import { capitalize, capitalizeWithUnderscore } from '../utils/Utils'; @@ -22,9 +22,8 @@ const DropdownComponent: React.FC = ({ return ( <>
        - LLM Model used for Extraction & Chat
        } selectProps={{ @@ -36,12 +35,12 @@ const DropdownComponent: React.FC = ({ const isModelSupported = !isProdEnv || prodllms?.includes(value); return { label: !isModelSupported ? ( - - + + {label} - - Available In Development Version - + + Available In Development Version + ) : ( {label} ), @@ -58,7 +57,10 @@ const DropdownComponent: React.FC = ({ value: value, }} size='medium' - fluid + isFluid + htmlAttributes={{ + 'aria-label': 'A selection dropdown', + }} /> {children}
        diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 42b82572a..ddbaebbac 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -103,15 +103,15 @@ const FileTable = forwardRef((props, ref) => { .includes('Processing'); return ( ); }, @@ -161,11 +161,13 @@ const FileTable = forwardRef((props, ref) => { if (info.getValue() != 'Processing') { return (
        - - {info.getValue()} +
        + +
        +
        {info.getValue()}
        {(info.getValue() === 'Completed' || info.getValue() === 'Failed' || info.getValue() === 'Cancelled') && !isReadOnlyUser && ( @@ -177,7 +179,7 @@ const FileTable = forwardRef((props, ref) => { clean onClick={() => onRetry(info?.row?.id as string)} > - + )} @@ -185,16 +187,22 @@ const FileTable = forwardRef((props, ref) => { ); } else if (info.getValue() === 'Processing' && info.row.original.processingProgress === undefined) { return ( -
        - - Processing +
        +
        + +
        +
        + Processing +
        { cancelHandler( info.row.original.name as string, @@ -223,10 +231,12 @@ const FileTable = forwardRef((props, ref) => {
        { cancelHandler( info.row.original.name as string, @@ -242,9 +252,11 @@ const FileTable = forwardRef((props, ref) => { ); } return ( -
        - - {info.getValue()} +
        +
        + +
        +
        {info.getValue()}
        ); }, @@ -304,7 +316,7 @@ const FileTable = forwardRef((props, ref) => { }, }, ], - defaultSortingActions: false, + hasDefaultSortingActions: false, }, }, }), @@ -313,26 +325,32 @@ const FileTable = forwardRef((props, ref) => { cell: (info: CellContext) => { if (parseInt(info.getValue()) === 100 || info.row.original?.status === 'New') { return ( - - - Uploaded - +
        + + + + Uploaded +
        ); } else if (info.row.original?.status === 'Uploading') { return ; } else if (info.row.original?.status === 'Failed') { return ( - - - NA - +
        + + + + NA +
        ); } return ( - - - Uploaded - +
        + + + + Uploaded +
        ); }, header: () => Upload Status, @@ -355,7 +373,7 @@ const FileTable = forwardRef((props, ref) => { return ( - + {info.row.original.fileSource} @@ -400,7 +418,7 @@ const FileTable = forwardRef((props, ref) => { }; }), ], - defaultSortingActions: false, + hasDefaultSortingActions: false, }, }, }), @@ -443,7 +461,7 @@ const FileTable = forwardRef((props, ref) => { }; }), ], - defaultSortingActions: false, + hasDefaultSortingActions: false, }, }, }), @@ -484,7 +502,7 @@ const FileTable = forwardRef((props, ref) => { }; }), ], - defaultSortingActions: false, + hasDefaultSortingActions: false, }, }, }), @@ -513,7 +531,7 @@ const FileTable = forwardRef((props, ref) => { clean onClick={() => onInspect(info?.row?.original?.name as string)} > - + ((props, ref) => { label='chunktextaction' text='View Chunks' size='large' - disabled={info.getValue() === 'Uploading'} + disabled={info.getValue() === 'Uploading' || info.getValue() === 'New'} > - + ), - size: 300, + maxSize: 300, minSize: 180, header: () => Actions, footer: (info) => info.column.id, @@ -927,7 +945,7 @@ const FileTable = forwardRef((props, ref) => { tableInstance={table} styling={{ borderStyle: 'all-sides', - zebraStriping: true, + hasZebraStriping: true, headerStyle: 'clean', }} isLoading={isLoading} @@ -935,7 +953,13 @@ const FileTable = forwardRef((props, ref) => { className: classNameCheck, }} components={{ - Body: (props) => , + Body: () => ( + + ), PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { return ( ((props, ref) => { ); }, }} + isKeyboardNavigable={false} />
        @@ -962,4 +987,4 @@ const FileTable = forwardRef((props, ref) => { ); }); -export default FileTable; \ No newline at end of file +export default FileTable; diff --git a/frontend/src/components/Graph/CheckboxSelection.tsx b/frontend/src/components/Graph/CheckboxSelection.tsx index b8caf7484..220b138f7 100644 --- a/frontend/src/components/Graph/CheckboxSelection.tsx +++ b/frontend/src/components/Graph/CheckboxSelection.tsx @@ -15,25 +15,25 @@ const CheckboxSelection: React.FC = ({
        {isDocChunk && ( handleChange('DocumentChunk')} /> )} {isEntity && ( handleChange('Entities')} /> )} {isCommunity && ( handleChange('Communities')} /> )} diff --git a/frontend/src/components/Graph/GraphPropertiesTable.tsx b/frontend/src/components/Graph/GraphPropertiesTable.tsx index fa270455b..753c200fb 100644 --- a/frontend/src/components/Graph/GraphPropertiesTable.tsx +++ b/frontend/src/components/Graph/GraphPropertiesTable.tsx @@ -16,7 +16,9 @@ const GraphPropertiesTable = ({ propertiesWithTypes }: GraphPropertiesTableProps
        {key} diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index ac35a93ae..4fe1f52f5 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -1,4 +1,4 @@ -import { Banner, Dialog, Flex, IconButtonArray, LoadingSpinner, useDebounce } from '@neo4j-ndl/react'; +import { Banner, Dialog, Flex, IconButtonArray, LoadingSpinner, useDebounceValue } from '@neo4j-ndl/react'; import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; import { BasicNode, @@ -53,7 +53,7 @@ const GraphViewModal: React.FunctionComponent = ({ const [scheme, setScheme] = useState({}); const [newScheme, setNewScheme] = useState({}); const [searchQuery, setSearchQuery] = useState(''); - const debouncedQuery = useDebounce(searchQuery, 300); + const [debouncedQuery] = useDebounceValue(searchQuery, 300); const [graphType, setGraphType] = useState([]); const [disableRefresh, setDisableRefresh] = useState(false); const [selected, setSelected] = useState<{ type: EntityType; id: string } | undefined>(undefined); @@ -339,12 +339,14 @@ const GraphViewModal: React.FunctionComponent = ({ id: 'default-menu', }} size='unset' - open={open} - aria-labelledby='form-dialog-title' - disableCloseButton={false} + isOpen={open} + hasDisabledCloseButton={false} onClose={onClose} + htmlAttributes={{ + 'aria-labelledby': 'form-dialog-title', + }} > - + {headerTitle} {checkBoxView && ( @@ -365,15 +367,15 @@ const GraphViewModal: React.FunctionComponent = ({
        ) : status !== 'unknown' ? (
        - +
        ) : nodes.length === 0 && relationships.length === 0 && graphType.length !== 0 ? (
        - +
        ) : graphType.length === 0 && checkBoxView ? (
        - +
        ) : ( <> @@ -390,7 +392,7 @@ const GraphViewModal: React.FunctionComponent = ({ }} nvlCallbacks={nvlCallbacks} /> - + {viewPoint !== 'chatInfoView' && ( = ({ placement='left' disabled={disableRefresh} > - + )} - + - + = ({ onClick={handleZoomToFit} placement='left' > - +
        @@ -445,4 +447,4 @@ const GraphViewModal: React.FunctionComponent = ({ ); }; -export default GraphViewModal; \ No newline at end of file +export default GraphViewModal; diff --git a/frontend/src/components/Graph/ResultOverview.tsx b/frontend/src/components/Graph/ResultOverview.tsx index 142c9fabc..6c3de167a 100644 --- a/frontend/src/components/Graph/ResultOverview.tsx +++ b/frontend/src/components/Graph/ResultOverview.tsx @@ -114,17 +114,25 @@ const ResultOverview: React.FunctionComponent = ({ {graphLabels.resultOverview}
        { setSearchQuery(e.target.value); }} - placeholder='Search On Node Properties' - fluid={true} - leftIcon={ - - + isFluid={true} + leftElement={ + + } /> diff --git a/frontend/src/components/Layout/DrawerChatbot.tsx b/frontend/src/components/Layout/DrawerChatbot.tsx index 3150e96a5..074c4ff78 100644 --- a/frontend/src/components/Layout/DrawerChatbot.tsx +++ b/frontend/src/components/Layout/DrawerChatbot.tsx @@ -11,7 +11,7 @@ const DrawerChatbot: React.FC = ({ isExpanded, clearHistoryD }; return (
        - + = ({ return (
        - + {!isReadOnlyUser ? ( - + {alertState.showAlert && ( = ({ }`} > {process.env.VITE_ENV != 'PROD' && ( - + {!isBackendConnected ? : } @@ -211,7 +211,7 @@ const DrawerDropzone: React.FC = ({
        ) : ( - + This user account does not have permission to access or manage data sources. diff --git a/frontend/src/components/Layout/Header.tsx b/frontend/src/components/Layout/Header.tsx index 752c31660..a47cbf505 100644 --- a/frontend/src/components/Layout/Header.tsx +++ b/frontend/src/components/Layout/Header.tsx @@ -39,7 +39,7 @@ function Header() { aria-label='main navigation' >
        - + void; - openSettingsDialog: () => void; -}) { +export default function PageLayout() { const largedesktops = useMediaQuery(`(min-width:1440px )`); const { userCredentials, connectionStatus } = useCredentials(); const [isLeftExpanded, setIsLeftExpanded] = useState(Boolean(largedesktops)); @@ -46,18 +38,34 @@ export default function PageLayoutNew({ } }; - const { messages, setClearHistoryData, clearHistoryData } = useMessageContext(); + const { messages, setClearHistoryData, clearHistoryData, setMessages } = useMessageContext(); const { isSchema, setIsSchema, setShowTextFromSchemaDialog, showTextFromSchemaDialog } = useFileContext(); - + const { cancel } = useSpeechSynthesis(); const deleteOnClick = async () => { try { setClearHistoryData(true); + cancel(); const response = await clearChatAPI( userCredentials as UserCredentials, sessionStorage.getItem('session_id') ?? '' ); if (response.data.status === 'Success') { - setClearHistoryData(false); + const date = new Date(); + + setMessages([ + { + datetime: `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`, + id: 2, + modes: { + 'graph+vector+fulltext': { + message: + ' Welcome to the Neo4j Knowledge Graph Chat. You can ask questions related to documents which have been completely processed.', + }, + }, + user: 'chatbot', + currentMode: 'graph+vector+fulltext', + }, + ]); } } catch (error) { console.log(error); @@ -86,31 +94,17 @@ export default function PageLayoutNew({ /> { setShowTextFromSchemaDialog({ triggeredFrom: '', show: false }); switch (showTextFromSchemaDialog.triggeredFrom) { case 'enhancementtab': toggleEnhancementDialog(); break; - case 'schemadialog': - openSettingsDialog(); - break; default: break; } }} > - { - setShowTextFromSchemaDialog({ triggeredFrom: 'schemadialog', show: true }); - }} - open={isSettingPanelExpanded} - onClose={closeSettingModal} - settingView='headerView' - isSchema={isSchema} - setIsSchema={setIsSchema} - /> setShowChatBot(true)} isLeftExpanded={isLeftExpanded} @@ -123,7 +117,6 @@ export default function PageLayoutNew({ setIsSchema={setIsSchema} showEnhancementDialog={showEnhancementDialog} toggleEnhancementDialog={toggleEnhancementDialog} - closeSettingModal={closeSettingModal} /> {showDrawerChatbot && ( = ({ position, @@ -43,31 +44,11 @@ const SideNav: React.FC = ({ const [isChatModalOpen, setIsChatModalOpen] = useState(false); const [isFullScreen, setIsFullScreen] = useState(false); const { setMessages } = useMessageContext(); - const [chatModeAnchor, setchatModeAnchor] = useState(null); const [showChatMode, setshowChatMode] = useState(false); const largedesktops = useMediaQuery(`(min-width:1440px )`); const { connectionStatus, isReadOnlyUser } = useCredentials(); const downloadLinkRef = useRef(null); - - const date = new Date(); - useEffect(() => { - if (clearHistoryData) { - setMessages([ - { - datetime: `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`, - id: 2, - modes: { - 'graph+vector+fulltext': { - message: - ' Welcome to the Neo4j Knowledge Graph Chat. You can ask questions related to documents which have been completely processed.', - }, - }, - user: 'chatbot', - currentMode: 'graph+vector+fulltext', - }, - ]); - } - }, [clearHistoryData]); + const anchorMenuRef = useRef(null); const handleExpandClick = () => { setIsChatModalOpen(true); @@ -98,32 +79,38 @@ const SideNav: React.FC = ({ return (
        - + {isExpanded && largedesktops && ( : } + htmlAttributes={{ onClick: handleClick }} + icon={ + position === 'left' ? ( + + ) : ( + + ) + } /> )} {!isExpanded && position === 'left' && largedesktops && ( - - + + + } /> )} {position === 'right' && !isExpanded && ( - - + + + } /> )} @@ -131,108 +118,117 @@ const SideNav: React.FC = ({ {!largedesktops && position === 'left' && !isReadOnlyUser && ( + - + } /> )} {!largedesktops && APP_SOURCES.includes('gcs') && position === 'left' && !isReadOnlyUser && ( + - + } /> )} {!largedesktops && APP_SOURCES.includes('s3') && position === 'left' && !isReadOnlyUser && ( + - + } /> )} {!largedesktops && APP_SOURCES.includes('web') && position === 'left' && !isReadOnlyUser && ( + - + } > )} {position === 'right' && isExpanded && ( <> - + - - - - {tooltips.clearChat} + + + + {tooltips.clearChat} } /> - - + + - + - - {tooltips.maximise} + + {tooltips.maximise} } /> - - + + { - downloadClickHandler( - { conversation: messages }, - downloadLinkRef, - 'graph-builder-conversation.json' - ); + htmlAttributes={{ + onClick: () => { + downloadClickHandler( + { conversation: messages }, + downloadLinkRef, + 'graph-builder-conversation.json' + ); + }, }} icon={ <> - + - - + + Download Conversation "" - + } /> - + {!isChatModalOpen && ( { - setchatModeAnchor(e.currentTarget); - setshowChatMode(true); - }} + ref={anchorMenuRef} icon={ <> - + { + setshowChatMode(true); + }} + size='small' + placement='left' + clean + label='Chat mode' + text='Chat mode' + > setshowChatMode(false)} - menuAnchor={chatModeAnchor} - disableBackdrop={true} - anchorPortal={true} + closeHandler={() => { + setshowChatMode(false); + }} + menuAnchor={anchorMenuRef} + isRoot={false} > } @@ -249,11 +245,11 @@ const SideNav: React.FC = ({ id: 'Chatbot-popup', className: 'n-p-token-4 n-rounded-lg h-[90%]', }} - open={isChatModalOpen} + isOpen={isChatModalOpen} size='unset' - disableCloseButton={true} + hasDisabledCloseButton={true} > - + { + const { breakpoints } = tokens; + const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); const sortedChunksData = useMemo(() => { return chunks.sort((a, b) => a.position - b.position); }, [chunks]); + return ( - - Text Chunks + + +
        + +
        + Text Chunks + + These text chunks are extracted to build a knowledge graph and enable accurate information retrieval using + a different retrival strategies + +
        +
        + {!chunksLoading && totalPageCount != null && totalPageCount > 0 && ( +
        + Total Pages: {totalPageCount} +
        + )} +
        {chunksLoading ? ( ) : ( -
          +
            {sortedChunksData.map((c, idx) => ( -
          1. - +
          2. + Position : {c.position} @@ -57,10 +82,10 @@ const ChunkPopUp = ({ {totalPageCount != null && totalPageCount > 1 && ( - + - + diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 76df732a4..934a38f76 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -1,4 +1,4 @@ -import { Button, Dialog, TextInput, Dropdown, Banner, Dropzone, Typography, TextLink, Flex } from '@neo4j-ndl/react'; +import { Button, Dialog, TextInput, Select, Banner, Dropzone, Typography, TextLink, Flex } from '@neo4j-ndl/react'; import React, { useCallback, useEffect, useMemo, useState, useRef } from 'react'; import connectAPI from '../../../services/ConnectAPI'; import { useCredentials } from '../../../context/UserCredentials'; @@ -311,18 +311,20 @@ export default function ConnectionModal({ <> { setOpenConnection((prev) => ({ ...prev, openPopUp: false })); setMessage({ type: 'unknown', content: '' }); }} - disableCloseButton={vectorIndexLoading} + hasDisabledCloseButton={vectorIndexLoading} + htmlAttributes={{ + 'aria-labelledby': 'form-dialog-title', + }} > - Connect to Neo4j + Connect to Neo4j - + Don't have a Neo4j instance? Start for free today @@ -330,17 +332,19 @@ export default function ConnectionModal({ (vectorIndexLoading ? ( ) : ( ))}
            @@ -363,87 +367,97 @@ export default function ConnectionModal({ />
            - newValue && setProtocol(newValue.value), options: protocols.map((option) => ({ label: option, value: option })), value: { label: protocol, value: protocol }, }} className='w-1/4 inline-block' - fluid + isFluid + htmlAttributes={{ + id: 'protocol', + }} />
            handleHostPasteChange(e), + onKeyDown: (e) => handleKeyPress(e, databaseRef), + 'aria-label': 'Connection URI', + }} value={URI} - disabled={false} + isDisabled={false} label='URI' - autoFocus - fluid + isFluid={true} onChange={(e) => setURI(e.target.value)} - onPaste={(e) => handleHostPasteChange(e)} - aria-label='Connection URI' - onKeyDown={(e) => handleKeyPress(e, databaseRef)} />
            setDatabase(e.target.value)} className='w-full' - onKeyDown={handleKeyPress} />
            setUsername(e.target.value)} - onKeyDown={handleKeyPress} />
            setPassword(e.target.value)} - onKeyDown={handleKeyPress} />
            ); -} \ No newline at end of file +} diff --git a/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx b/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx index 3c2965f44..79c472aeb 100644 --- a/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx +++ b/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx @@ -33,7 +33,7 @@ To proceed, please choose one of the following options: : ''} - +
            {isVectorIndexAlreadyExists ? 'Re-Create Vector Index' : 'Create Vector Index'} - +
            ); } diff --git a/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx b/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx index cbe69c492..e3604e70c 100644 --- a/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx +++ b/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx @@ -25,14 +25,14 @@ function DeletePopUp({ no_of_files > 1 ? 'Nodes' : 'Node' } from the graph database? `; return ( - +
            {message}
            {view === 'contentView' && (
            { if (e.target.checked) { setDeleteEntities(true); @@ -45,10 +45,10 @@ function DeletePopUp({ )} - - diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 330f5f657..d59b46e21 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -1,4 +1,4 @@ -import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; +import { useCallback, useContext, useEffect, useMemo, useRef, useState } from 'react'; import { getDuplicateNodes } from '../../../../services/GetDuplicateNodes'; import { useCredentials } from '../../../../context/UserCredentials'; import { dupNodes, selectedDuplicateNodes, UserCredentials } from '../../../../types'; @@ -30,6 +30,7 @@ import mergeDuplicateNodes from '../../../../services/MergeDuplicateEntities'; import { tokens } from '@neo4j-ndl/base'; import GraphViewModal from '../../../Graph/GraphViewModal'; import { handleGraphNodeClick } from '../../../ChatBot/chatInfo'; +import { ThemeWrapperContext } from '../../../../context/ThemeWrapper'; export default function DeduplicationTab() { const { breakpoints } = tokens; @@ -46,6 +47,8 @@ export default function DeduplicationTab() { const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); const [nodesCount, setNodesCount] = useState(0); + const { colorMode } = useContext(ThemeWrapperContext); + const fetchDuplicateNodes = useCallback(async () => { try { setLoading(true); @@ -66,10 +69,13 @@ export default function DeduplicationTab() { console.log(error); } }, [userCredentials]); + useEffect(() => { - (async () => { - await fetchDuplicateNodes(); - })(); + if (userCredentials != null) { + (async () => { + await fetchDuplicateNodes(); + })(); + } }, [userCredentials]); const clickHandler = async () => { @@ -127,8 +133,8 @@ export default function DeduplicationTab() { header: ({ table }: { table: Table }) => { return ( ); @@ -137,10 +143,10 @@ export default function DeduplicationTab() { return (
            ); @@ -154,8 +160,10 @@ export default function DeduplicationTab() {
            handleDuplicateNodeClick(info.row.id, 'chatInfoView')} - title={info.getValue()} + htmlAttributes={{ + onClick: () => handleDuplicateNodeClick(info.row.id, 'chatInfoView'), + title: info.getValue(), + }} > {info.getValue()} @@ -179,7 +187,7 @@ export default function DeduplicationTab() { onRemove={() => { onRemove(info.row.original.e.elementId, s.elementId); }} - removeable={true} + isRemovable={true} type='default' size={isTablet ? 'small' : 'medium'} > @@ -286,7 +294,7 @@ export default function DeduplicationTab() { tableInstance={table} styling={{ borderStyle: 'all-sides', - zebraStriping: true, + hasZebraStriping: true, headerStyle: 'clean', }} rootProps={{ @@ -294,7 +302,13 @@ export default function DeduplicationTab() { }} isLoading={isLoading} components={{ - Body: (props) => , + Body: () => ( + + ), PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { return ( ); -} \ No newline at end of file +} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx index 6d4daae10..1f6bdb347 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx @@ -1,5 +1,5 @@ import { Checkbox, DataGrid, DataGridComponents, Flex, TextLink, Typography, useMediaQuery } from '@neo4j-ndl/react'; -import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; +import { useCallback, useContext, useEffect, useMemo, useRef, useState } from 'react'; import { UserCredentials, orphanNodeProps } from '../../../../types'; import { getOrphanNodes } from '../../../../services/GetOrphanNodes'; import { useCredentials } from '../../../../context/UserCredentials'; @@ -21,6 +21,7 @@ import DeletePopUp from '../../DeletePopUp/DeletePopUp'; import { tokens } from '@neo4j-ndl/base'; import GraphViewModal from '../../../Graph/GraphViewModal'; import { handleGraphNodeClick } from '../../../ChatBot/chatInfo'; +import { ThemeWrapperContext } from '../../../../context/ThemeWrapper'; export default function DeletePopUpForOrphanNodes({ deleteHandler, loading, @@ -41,6 +42,7 @@ export default function DeletePopUpForOrphanNodes({ const [neoRels, setNeoRels] = useState([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); + const { colorMode } = useContext(ThemeWrapperContext); const fetchOrphanNodes = useCallback(async () => { try { @@ -62,9 +64,11 @@ export default function DeletePopUpForOrphanNodes({ }, [userCredentials]); useEffect(() => { - (async () => { - await fetchOrphanNodes(); - })(); + if (userCredentials != null) { + (async () => { + await fetchOrphanNodes(); + })(); + } return () => { setOrphanNodes([]); setTotalOrphanNodes(0); @@ -91,8 +95,8 @@ export default function DeletePopUpForOrphanNodes({ header: ({ table }: { table: Table }) => { return ( ); @@ -101,10 +105,10 @@ export default function DeletePopUpForOrphanNodes({ return (
            ); @@ -118,8 +122,10 @@ export default function DeletePopUpForOrphanNodes({
            handleOrphanNodeClick(info.row.id, 'chatInfoView')} - title={info.getValue()} + htmlAttributes={{ + onClick: () => handleOrphanNodeClick(info.row.id, 'chatInfoView'), + title: info.getValue(), + }} > {info.getValue()} @@ -254,7 +260,7 @@ export default function DeletePopUpForOrphanNodes({ tableInstance={table} styling={{ borderStyle: 'all-sides', - zebraStriping: true, + hasZebraStriping: true, headerStyle: 'clean', }} rootProps={{ @@ -262,7 +268,13 @@ export default function DeletePopUpForOrphanNodes({ }} isLoading={isLoading} components={{ - Body: (props) => , + Body: () => ( + + ), PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { return ( ); -} \ No newline at end of file +} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx index 3ab5b48bd..9743e5b14 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx @@ -1,13 +1,12 @@ -import { MouseEventHandler, useCallback, useEffect, useState } from 'react'; +import { MouseEventHandler, useCallback, useEffect, useMemo, useState } from 'react'; import ButtonWithToolTip from '../../../UI/ButtonWithToolTip'; -import { appLabels, buttonCaptions, tooltips } from '../../../../utils/Constants'; -import { Dropdown, Flex, Typography, useMediaQuery } from '@neo4j-ndl/react'; +import { appLabels, buttonCaptions, getDefaultSchemaExamples, tooltips } from '../../../../utils/Constants'; +import { Select, Flex, Typography, useMediaQuery } from '@neo4j-ndl/react'; import { useCredentials } from '../../../../context/UserCredentials'; import { useFileContext } from '../../../../context/UsersFiles'; import { OnChangeValue, ActionMeta } from 'react-select'; import { OptionType, schema, UserCredentials } from '../../../../types'; import { getNodeLabelsAndRelTypes } from '../../../../services/GetNodeLabelsRelTypes'; -import schemaExamples from '../../../../assets/schemas.json'; import { tokens } from '@neo4j-ndl/base'; import { showNormalToast } from '../../../../utils/toasts'; @@ -149,22 +148,8 @@ export default function EntityExtractionSetting({ }; const [nodeLabelOptions, setnodeLabelOptions] = useState([]); const [relationshipTypeOptions, setrelationshipTypeOptions] = useState([]); - const [defaultExamples, setdefaultExamples] = useState([]); + const defaultExamples = useMemo(() => getDefaultSchemaExamples(), []); - useEffect(() => { - const parsedData = schemaExamples.reduce((accu: OptionType[], example) => { - const examplevalues: OptionType = { - label: example.schema, - value: JSON.stringify({ - nodelabels: example.labels, - relationshipTypes: example.relationshipTypes, - }), - }; - accu.push(examplevalues); - return accu; - }, []); - setdefaultExamples(parsedData); - }, []); useEffect(() => { if (userCredentials) { if (open && view === 'Dialog') { @@ -265,15 +250,6 @@ export default function EntityExtractionSetting({ ); }; - // Load selectedSchemas from local storage on mount - useEffect(() => { - const storedSchemas = localStorage.getItem('selectedSchemas'); - if (storedSchemas) { - const parsedSchemas = JSON.parse(storedSchemas); - setSelectedSchemas(parsedSchemas.selectedOptions); - } - }, []); - return (
            @@ -290,7 +266,7 @@ export default function EntityExtractionSetting({
            {appLabels.predefinedSchema}
            -
            {appLabels.ownSchema}
            - - } - checked={true} - disabled={true} - aria-label='Selected-postprocessing-jobs' + isChecked={true} + isDisabled={true} + ariaLabel='Selected-postprocessing-jobs' /> ); })} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx index b48b6c6a6..d5ebc04a2 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx @@ -32,7 +32,7 @@ export default function PostProcessingCheckList() { .join(' ')} } - checked={ + isChecked={ isCreateCommunities ? isGdsActive && postProcessingTasks.includes(job.title) : postProcessingTasks.includes(job.title) @@ -44,8 +44,8 @@ export default function PostProcessingCheckList() { setPostProcessingTasks((prev) => prev.filter((s) => s !== job.title)); } }} - disabled={isCreateCommunities && !isGdsActive} - aria-label='checkbox-postProcessing' + isDisabled={isCreateCommunities && !isGdsActive} + ariaLabel='checkbox-postProcessing' /> {job.description}
            diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx index c7b621748..4226099f9 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx @@ -1,6 +1,6 @@ -import { Dialog, Tabs, Box, Typography, Flex, useMediaQuery } from '@neo4j-ndl/react'; +import { Dialog, Tabs, Typography, Flex, useMediaQuery } from '@neo4j-ndl/react'; import graphenhancement from '../../../assets/images/graph-enhancements.svg'; -import { useEffect, useState } from 'react'; +import { useState } from 'react'; import DeletePopUpForOrphanNodes from './DeleteTabForOrphanNodes'; import deleteOrphanAPI from '../../../services/DeleteOrphanNodes'; import { UserCredentials } from '../../../types'; @@ -11,15 +11,7 @@ import DeduplicationTab from './Deduplication'; import { tokens } from '@neo4j-ndl/base'; import PostProcessingCheckList from './PostProcessingCheckList'; -export default function GraphEnhancementDialog({ - open, - onClose, - closeSettingModal, -}: { - open: boolean; - onClose: () => void; - closeSettingModal: () => void; -}) { +export default function GraphEnhancementDialog({ open, onClose }: { open: boolean; onClose: () => void }) { const { breakpoints } = tokens; const [orphanDeleteAPIloading, setorphanDeleteAPIloading] = useState(false); const { setShowTextFromSchemaDialog } = useFileContext(); @@ -36,9 +28,6 @@ export default function GraphEnhancementDialog({ console.log(error); } }; - useEffect(() => { - closeSettingModal(); - }, []); const [activeTab, setactiveTab] = useState(0); return ( @@ -47,14 +36,14 @@ export default function GraphEnhancementDialog({ id: 'graph-enhancement-popup', className: 'n-p-token-4 n-rounded-lg', }} - open={open} + isOpen={open} size='unset' - disableCloseButton={false} + hasDisabledCloseButton={false} onClose={onClose} > - - +
            +
            - +
            Graph Enhancements {isTablet @@ -76,23 +65,43 @@ export default function GraphEnhancementDialog({ - + Entity Extraction Settings - + Disconnected Nodes - + De-Duplication Of Nodes - + Post Processing Jobs - - - +
            +
            +
            diff --git a/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx b/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx index 08991c44b..c0159be65 100644 --- a/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx +++ b/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx @@ -70,12 +70,14 @@ function ConfirmationDialog({ return ( { setChecked([]); onClose(); }} + htmlAttributes={{ + 'aria-labelledby': 'form-dialog-title', + }} > {largeFiles.length === 0 && loading ? ( diff --git a/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx b/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx index cc684156b..b8dfa0251 100644 --- a/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx +++ b/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx @@ -1,4 +1,4 @@ -import { Box, Checkbox, Flex, Typography } from '@neo4j-ndl/react'; +import { Checkbox, Flex, Typography } from '@neo4j-ndl/react'; import { DocumentTextIconOutline } from '@neo4j-ndl/react/icons'; import { LargefilesProps } from '../../../types'; import { List, ListItem, ListItemAvatar, ListItemButton, ListItemIcon, ListItemText } from '@mui/material'; @@ -30,16 +30,16 @@ const LargeFilesAlert: FC = ({ largeFiles, handleToggle, checke [colorMode] ); return ( - - +
            +
            alert icon - +
            Large Document Notice - + One or more of your selected documents are large and may take extra time to process. Please review the estimated times below @@ -51,7 +51,7 @@ const LargeFilesAlert: FC = ({ largeFiles, handleToggle, checke { if (e.target.checked) { handleToggle(true, f.id); @@ -59,8 +59,8 @@ const LargeFilesAlert: FC = ({ largeFiles, handleToggle, checke handleToggle(false, f.id); } }} - checked={checked.indexOf(f.id) !== -1} - tabIndex={-1} + isChecked={checked.indexOf(f.id) !== -1} + htmlAttributes={{ tabIndex: -1 }} /> @@ -96,9 +96,9 @@ const LargeFilesAlert: FC = ({ largeFiles, handleToggle, checke ); })} - - - +
            +
            +
            ); }; export default LargeFilesAlert; diff --git a/frontend/src/components/Popups/RetryConfirmation/Index.tsx b/frontend/src/components/Popups/RetryConfirmation/Index.tsx index 6ebb02e33..5112ca688 100644 --- a/frontend/src/components/Popups/RetryConfirmation/Index.tsx +++ b/frontend/src/components/Popups/RetryConfirmation/Index.tsx @@ -27,11 +27,11 @@ function RetryConfirmationDialog({ const file = filesData.find((c) => c.id === fileId); const RetryOptionsForFile = file?.status != 'Completed' ? RETRY_OPIONS : RETRY_OPIONS.slice(0, 2); return ( - + Reprocess Options {alertStatus.showAlert && ( - + {alertStatus.alertMessage} )} @@ -47,17 +47,19 @@ function RetryConfirmationDialog({ }); }); }} - name='retryoptions' - checked={o === file?.retryOption && file?.retryOptionStatus} + htmlAttributes={{ + name: 'retryoptions', + onKeyDown: (e) => { + if (e.code === 'Enter' && file?.retryOption.length) { + retryHandler(file?.name as string, file?.retryOption as string); + } + }, + }} + isChecked={o === file?.retryOption && file?.retryOptionStatus} label={o .split('_') .map((s) => capitalize(s)) .join(' ')} - onKeyDown={(e) => { - if (e.code === 'Enter' && file?.retryOption.length) { - retryHandler(file?.name as string, file?.retryOption as string); - } - }} /> ); })} diff --git a/frontend/src/components/Popups/Settings/SchemaFromText.tsx b/frontend/src/components/Popups/Settings/SchemaFromText.tsx index 1b9136198..f59ec86bf 100644 --- a/frontend/src/components/Popups/Settings/SchemaFromText.tsx +++ b/frontend/src/components/Popups/Settings/SchemaFromText.tsx @@ -1,4 +1,4 @@ -import { Checkbox, Dialog, Textarea } from '@neo4j-ndl/react'; +import { Checkbox, Dialog, TextArea } from '@neo4j-ndl/react'; import { useCallback, useState } from 'react'; import { getNodeLabelsAndRelTypesFromText } from '../../../services/SchemaFromTextAPI'; import { useCredentials } from '../../../context/UserCredentials'; @@ -7,15 +7,7 @@ import { buttonCaptions } from '../../../utils/Constants'; import ButtonWithToolTip from '../../UI/ButtonWithToolTip'; import { showNormalToast, showSuccessToast } from '../../../utils/toasts'; -const SchemaFromTextDialog = ({ - open, - onClose, - openSettingsDialog, -}: { - open: boolean; - onClose: () => void; - openSettingsDialog: () => void; -}) => { +const SchemaFromTextDialog = ({ open, onClose }: { open: boolean; onClose: () => void }) => { const [userText, setUserText] = useState(''); const [loading, setloading] = useState(false); const { setSelectedNodes, setSelectedRels } = useFileContext(); @@ -84,7 +76,6 @@ const SchemaFromTextDialog = ({ onClose(); setUserText(''); setIsSchema(false); - openSettingsDialog(); } catch (error) { setloading(false); console.log(error); @@ -94,26 +85,30 @@ const SchemaFromTextDialog = ({ return ( { setloading(false); setIsSchema(false); setUserText(''); onClose(); }} + htmlAttributes={{ + 'aria-labelledby': 'form-dialog-title', + }} > - Entity Graph Extraction Settings + Entity Graph Extraction Settings - + )); const isMultiModes = useMemo( () => activeChatmodes != null && Object.keys(activeChatmodes).length > 1, @@ -320,7 +318,7 @@ const ChatInfoModal: React.FC = ({ [activeChatmodes] ); return ( -
            +
            = ({ - + {!supportedLLmsForRagas.includes(metricmodel) && ( = ({ about 20 seconds . You'll see detailed scores shortly. -
            - { - setExpandedId(e); - }} - > - - Determines How accurately the answer reflects the provided information. - - - Determines How well the answer addresses the user's question. - - {(isAdditionalMetricsWithSingleMode || isAdditionalMetricsEnabled) && ( - <> - - Determines How much the generated answer matches the reference answer, word-for-word - - - Determines How well the generated answer understands the meaning of the reference answer. - - - Determines Measures the recall of entities present in both reference and retrieved contexts - relative to the reference. - - - )} - -
            {showMultiModeMetrics && isMultiModes && ( ; - closeHandler?: () => void; + closeHandler?: ( + event: Event | undefined, + closeReason: { + type: 'backdropClick' | 'itemClick' | 'escapeKeyDown'; + id?: string; + } + ) => void; open: boolean; isRoot: boolean; }) { diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index 0ebf4dc59..2db5a2cbd 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -268,4 +268,4 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { ); }; -export default ChunkInfo; \ No newline at end of file +export default ChunkInfo; diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index f052c7a2f..30d533193 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -76,4 +76,4 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = ); }; -export default CommunitiesInfo; \ No newline at end of file +export default CommunitiesInfo; diff --git a/frontend/src/components/ChatBot/EntitiesInfo.tsx b/frontend/src/components/ChatBot/EntitiesInfo.tsx index 7a5a0510a..0a90862a4 100644 --- a/frontend/src/components/ChatBot/EntitiesInfo.tsx +++ b/frontend/src/components/ChatBot/EntitiesInfo.tsx @@ -148,4 +148,4 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in ); }; -export default EntitiesInfo; \ No newline at end of file +export default EntitiesInfo; diff --git a/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx index ae3a7818f..934e4c624 100644 --- a/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx +++ b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx @@ -13,7 +13,11 @@ const ExpandedChatButtonContainer: React.FC = ({ closeChatBot, delete return (
            setshowChatModeOption(false)} + closeHandler={(_, reason) => { + if (reason.type === 'backdropClick') { + setshowChatModeOption(false); + } + }} open={showChatModeOption} menuAnchor={chatAnchor} isRoot={false} diff --git a/frontend/src/components/ChatBot/MetricsCheckbox.tsx b/frontend/src/components/ChatBot/MetricsCheckbox.tsx index 31c1ab7f5..593468c2b 100644 --- a/frontend/src/components/ChatBot/MetricsCheckbox.tsx +++ b/frontend/src/components/ChatBot/MetricsCheckbox.tsx @@ -9,7 +9,7 @@ function MetricsCheckbox({ }) { return ( diff --git a/frontend/src/components/ChatBot/MetricsTab.tsx b/frontend/src/components/ChatBot/MetricsTab.tsx index 40e0b11fc..33479eb13 100644 --- a/frontend/src/components/ChatBot/MetricsTab.tsx +++ b/frontend/src/components/ChatBot/MetricsTab.tsx @@ -1,4 +1,4 @@ -import { Banner, Box, DataGrid, DataGridComponents, Typography } from '@neo4j-ndl/react'; +import { Banner, Box, DataGrid, DataGridComponents, Flex, IconButton, Popover, Typography } from '@neo4j-ndl/react'; import { memo, useContext, useMemo, useRef } from 'react'; import { useReactTable, @@ -10,6 +10,8 @@ import { } from '@tanstack/react-table'; import { capitalize } from '../../utils/Utils'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; +import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; +import { metricsinfo } from '../../utils/Constants'; function MetricsTab({ metricsLoading, metricDetails, @@ -40,9 +42,21 @@ function MetricsTab({ .join(' ') : capitalize(metric); return ( -
            - {capitilizedMetric} -
            + +
            + {capitilizedMetric} +
            + + + + + + + + {metricsinfo[metric]} + + +
            ); }, header: () => Metric, diff --git a/frontend/src/components/ChatBot/MultiModeMetrics.tsx b/frontend/src/components/ChatBot/MultiModeMetrics.tsx index 6744af10e..0ecae78c3 100644 --- a/frontend/src/components/ChatBot/MultiModeMetrics.tsx +++ b/frontend/src/components/ChatBot/MultiModeMetrics.tsx @@ -4,13 +4,13 @@ import { createColumnHelper, getFilteredRowModel, getPaginationRowModel, - getSortedRowModel, } from '@tanstack/react-table'; import { capitalize } from '../../utils/Utils'; import { useContext, useEffect, useMemo, useRef } from 'react'; -import { Banner, Box, DataGrid, DataGridComponents, Typography } from '@neo4j-ndl/react'; +import { Banner, Box, DataGrid, DataGridComponents, Flex, IconButton, Popover, Typography } from '@neo4j-ndl/react'; import { multimodelmetric } from '../../types'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; +import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; export default function MultiModeMetrics({ data, @@ -52,35 +52,115 @@ export default function MultiModeMetrics({ cell: (info) => { return {info.getValue().toFixed(2)}; }, - header: () => Relevancy, + header: () => ( + + Relevancy + + + + + + + + + Determines How well the answer addresses the user's question. + + + + + ), }), columnHelper.accessor((row) => row.faithfulness as number, { id: 'Score', cell: (info) => { return {info.getValue().toFixed(2)}; }, - header: () => Faithfulness, + header: () => ( + + Faithful + + + + + + + + + Determines How accurately the answer reflects the provided information. + + + + + ), }), columnHelper.accessor((row) => row.context_entity_recall_score as number, { id: 'Recall Score', cell: (info) => { return {info.getValue()?.toFixed(2)}; }, - header: () => Context Score, + header: () => ( + + Context + + + + + + + + + Determines the recall of entities present in both reference and retrieved contexts. + + + + + ), }), columnHelper.accessor((row) => row.semantic_score as number, { id: 'Semantic Score', cell: (info) => { return {info.getValue()?.toFixed(2)}; }, - header: () => Semantic Score, + header: () => ( + + Semantic + + + + + + + + + Determines How well the generated answer understands the meaning of the reference answer. + + + + + ), }), columnHelper.accessor((row) => row.rouge_score as number, { id: 'Rouge Score', cell: (info) => { return {info.getValue()?.toFixed(2)}; }, - header: () => Rouge Score, + header: () => ( + + Rouge + + + + + + + + + Determines How much the generated answer matches the reference answer, word-for-word. + + + + + ), }), ], [] @@ -93,10 +173,10 @@ export default function MultiModeMetrics({ getPaginationRowModel: getPaginationRowModel(), enableGlobalFilter: false, autoResetPageIndex: false, + enableColumnResizing: true, enableRowSelection: true, enableMultiRowSelection: true, - enableSorting: true, - getSortedRowModel: getSortedRowModel(), + enableSorting: false, }); useEffect(() => { if (isWithAdditionalMetrics === false) { diff --git a/frontend/src/components/ChatBot/chatInfo.ts b/frontend/src/components/ChatBot/chatInfo.ts index 1a229dc70..c7e990ae7 100644 --- a/frontend/src/components/ChatBot/chatInfo.ts +++ b/frontend/src/components/ChatBot/chatInfo.ts @@ -37,4 +37,4 @@ export const handleGraphNodeClick = async ( setLoadingGraphView(false); } } -}; \ No newline at end of file +}; diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 9996ed0a2..98d55b40e 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -116,12 +116,13 @@ const Content: React.FC = ({ setchatModes, } = useFileContext(); - const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'|'neighborView'>('tableView'); + const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView' | 'neighborView'>( + 'tableView' + ); const [showDeletePopUp, setshowDeletePopUp] = useState(false); const [deleteLoading, setdeleteLoading] = useState(false); const [searchParams] = useSearchParams(); - const { updateStatusForLargeFiles } = useServerSideEvent( (inMinutes, time, fileName) => { showNormalToast(`${fileName} will take approx ${time} ${inMinutes ? 'Min' : 'Sec'}`); @@ -992,4 +993,4 @@ const Content: React.FC = ({ ); }; -export default Content; \ No newline at end of file +export default Content; diff --git a/frontend/src/components/DataSources/AWS/S3Modal.tsx b/frontend/src/components/DataSources/AWS/S3Modal.tsx index 51144c930..0e9b2f49f 100644 --- a/frontend/src/components/DataSources/AWS/S3Modal.tsx +++ b/frontend/src/components/DataSources/AWS/S3Modal.tsx @@ -40,6 +40,12 @@ const S3Modal: React.FC = ({ hideModal, open }) => { processingProgress: undefined, retryOption: '', retryOptionStatus: false, + chunkNodeCount: 0, + chunkRelCount: 0, + entityNodeCount: 0, + entityEntityRelCount: 0, + communityNodeCount: 0, + communityRelCount: 0, }; if (url) { setValid(validation(bucketUrl) && isFocused); diff --git a/frontend/src/components/DataSources/GCS/GCSModal.tsx b/frontend/src/components/DataSources/GCS/GCSModal.tsx index 8893c4f70..288bff00a 100644 --- a/frontend/src/components/DataSources/GCS/GCSModal.tsx +++ b/frontend/src/components/DataSources/GCS/GCSModal.tsx @@ -33,6 +33,12 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => processingProgress: undefined, retryOption: '', retryOptionStatus: false, + chunkNodeCount: 0, + chunkRelCount: 0, + entityNodeCount: 0, + entityEntityRelCount: 0, + communityNodeCount: 0, + communityRelCount: 0, }; const reset = () => { diff --git a/frontend/src/components/DataSources/Local/DropZone.tsx b/frontend/src/components/DataSources/Local/DropZone.tsx index 38a6dd1d9..209573eaf 100644 --- a/frontend/src/components/DataSources/Local/DropZone.tsx +++ b/frontend/src/components/DataSources/Local/DropZone.tsx @@ -34,6 +34,12 @@ const DropZone: FunctionComponent = () => { processingProgress: undefined, retryOptionStatus: false, retryOption: '', + chunkNodeCount: 0, + chunkRelCount: 0, + entityNodeCount: 0, + entityEntityRelCount: 0, + communityNodeCount: 0, + communityRelCount: 0, }; const copiedFilesData: CustomFile[] = [...filesData]; diff --git a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx index 1b27f4eb7..0e78b4704 100644 --- a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx +++ b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx @@ -169,6 +169,12 @@ export default function DropZoneForSmallLayouts() { processingProgress: undefined, retryOption: '', retryOptionStatus: false, + chunkNodeCount: 0, + chunkRelCount: 0, + entityNodeCount: 0, + entityEntityRelCount: 0, + communityNodeCount: 0, + communityRelCount: 0, }; const copiedFilesData: CustomFile[] = [...filesData]; diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index ad06a28c3..22f48c119 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -3,6 +3,7 @@ import { DataGridComponents, Flex, IconButton, + Popover, ProgressBar, StatusIndicator, TextLink, @@ -41,6 +42,7 @@ import { ClipboardDocumentIconSolid, MagnifyingGlassCircleIconSolid, DocumentTextIconSolid, + InformationCircleIconOutline, } from '@neo4j-ndl/react/icons'; import CustomProgressBar from './UI/CustomProgressBar'; import subscribe from '../services/PollingAPI'; @@ -53,6 +55,7 @@ import { IconButtonWithToolTip } from './UI/IconButtonToolTip'; import { batchSize, largeFileSize, llms } from '../utils/Constants'; import { showErrorToast, showNormalToast } from '../utils/toasts'; import { ThemeWrapperContext } from '../context/ThemeWrapper'; + let onlyfortheFirstRender = true; const FileTable = forwardRef((props, ref) => { @@ -505,13 +508,67 @@ const FileTable = forwardRef((props, ref) => { }), columnHelper.accessor((row) => row.nodesCount, { id: 'NodesCount', - cell: (info) => {info.getValue()}, + cell: (info) => { + const hasNodeBreakDownValues = + info.row.original.chunkNodeCount > 0 || + info.row.original.communityNodeCount > 0 || + info.row.original.entityNodeCount > 0; + return ( + + {info.getValue()} + {hasNodeBreakDownValues && + (info.row.original.status === 'Completed' || info.row.original.status === 'Failed') && ( + + + + + + + +
              +
            • Chunk Nodes: {info.row.original.chunkNodeCount}
            • +
            • Entity Nodes: {info.row.original.entityNodeCount}
            • +
            • Community Nodes: {info.row.original.communityNodeCount}
            • +
            +
            +
            + )} +
            + ); + }, header: () => Nodes, footer: (info) => info.column.id, }), columnHelper.accessor((row) => row.relationshipsCount, { id: 'relationshipCount', - cell: (info) => {info.getValue()}, + cell: (info) => { + const hasRelationsBreakDownValues = + info.row.original.chunkRelCount > 0 || + info.row.original.communityRelCount > 0 || + info.row.original.entityEntityRelCount > 0; + return ( + + {info.getValue()} + {hasRelationsBreakDownValues && + (info.row.original.status === 'Completed' || info.row.original.status === 'Failed') && ( + + + + + + + +
              +
            • Chunk Relations: {info.row.original.chunkRelCount}
            • +
            • Entity Relations: {info.row.original.entityEntityRelCount}
            • +
            • Community Relations: {info.row.original.communityRelCount}
            • +
            +
            +
            + )} +
            + ); + }, header: () => Relations, footer: (info) => info.column.id, }), @@ -719,6 +776,12 @@ const FileTable = forwardRef((props, ref) => { accessToken: item?.accessToken ?? '', retryOption: item.retry_condition ?? '', retryOptionStatus: false, + chunkNodeCount: item.chunkNodeCount ?? 0, + chunkRelCount: item.chunkRelCount ?? 0, + entityNodeCount: item.entityNodeCount ?? 0, + entityEntityRelCount: item.entityEntityRelCount ?? 0, + communityNodeCount: item.communityNodeCount ?? 0, + communityRelCount: item.communityRelCount ?? 0, }); } }); @@ -846,6 +909,12 @@ const FileTable = forwardRef((props, ref) => { status, processed_chunk = 0, total_chunks, + chunkNodeCount, + entityNodeCount, + communityNodeCount, + chunkRelCount, + entityEntityRelCount, + communityRelCount, } = file_name; if (fileName && total_chunks) { setFilesData((prevfiles) => @@ -855,10 +924,16 @@ const FileTable = forwardRef((props, ref) => { ...curfile, status: status, nodesCount: nodeCount, - relationshipCount: relationshipCount, + relationshipsCount: relationshipCount, model: model, processingTotalTime: processingTime?.toFixed(2), processingProgress: Math.floor((processed_chunk / total_chunks) * 100), + chunkNodeCount: chunkNodeCount ?? 0, + entityNodeCount: entityNodeCount ?? 0, + communityNodeCount: communityNodeCount ?? 0, + chunkRelCount: chunkRelCount ?? 0, + entityEntityRelCount: entityEntityRelCount ?? 0, + communityRelCount: communityRelCount ?? 0, }; } return curfile; @@ -876,7 +951,20 @@ const FileTable = forwardRef((props, ref) => { const updateProgress = (i: statusupdate) => { const { file_name } = i; - const { fileName, nodeCount = 0, relationshipCount = 0, status, processed_chunk = 0, total_chunks } = file_name; + const { + fileName, + nodeCount = 0, + relationshipCount = 0, + status, + processed_chunk = 0, + total_chunks, + chunkNodeCount, + entityNodeCount, + communityNodeCount, + chunkRelCount, + entityEntityRelCount, + communityRelCount, + } = file_name; if (fileName && total_chunks) { setFilesData((prevfiles) => prevfiles.map((curfile) => { @@ -885,8 +973,14 @@ const FileTable = forwardRef((props, ref) => { ...curfile, status: status, nodesCount: nodeCount, - relationshipCount: relationshipCount, + relationshipsCount: relationshipCount, processingProgress: Math.floor((processed_chunk / total_chunks) * 100), + chunkNodeCount: chunkNodeCount ?? 0, + entityNodeCount: entityNodeCount ?? 0, + communityNodeCount: communityNodeCount ?? 0, + chunkRelCount: chunkRelCount ?? 0, + entityEntityRelCount: entityEntityRelCount ?? 0, + communityRelCount: communityRelCount ?? 0, }; } return curfile; @@ -984,4 +1078,4 @@ const FileTable = forwardRef((props, ref) => { ); }); -export default FileTable; \ No newline at end of file +export default FileTable; diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 7748ebca2..4fe1f52f5 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -447,4 +447,4 @@ const GraphViewModal: React.FunctionComponent = ({ ); }; -export default GraphViewModal; \ No newline at end of file +export default GraphViewModal; diff --git a/frontend/src/components/Layout/Header.tsx b/frontend/src/components/Layout/Header.tsx index 97bcf1902..766758cc5 100644 --- a/frontend/src/components/Layout/Header.tsx +++ b/frontend/src/components/Layout/Header.tsx @@ -198,7 +198,11 @@ const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnecti
            setshowChatModeOption(false)} + closeHandler={(_, reason) => { + if (reason.type === 'backdropClick') { + setshowChatModeOption(false); + } + }} open={showChatModeOption} menuAnchor={chatAnchor} isRoot={false} diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index 3a2e8d6b7..731469e6b 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -224,8 +224,10 @@ const SideNav: React.FC = ({ { - setshowChatMode(false); + closeHandler={(_, reason) => { + if (reason.type === 'backdropClick') { + setshowChatMode(false); + } }} menuAnchor={anchorMenuRef} isRoot={false} diff --git a/frontend/src/components/Popups/ChunkPopUp/index.tsx b/frontend/src/components/Popups/ChunkPopUp/index.tsx index 7aeb5b961..b8020c276 100644 --- a/frontend/src/components/Popups/ChunkPopUp/index.tsx +++ b/frontend/src/components/Popups/ChunkPopUp/index.tsx @@ -1,4 +1,3 @@ - import { Dialog, Typography, Flex, IconButton, useMediaQuery } from '@neo4j-ndl/react'; import { ArrowLeftIconOutline, ArrowRightIconOutline } from '@neo4j-ndl/react/icons'; import { chunkdata } from '../../../types'; @@ -7,7 +6,6 @@ import { useMemo } from 'react'; import chunklogo from '../../../assets/images/chunks.svg'; import { tokens } from '@neo4j-ndl/base'; - const ChunkPopUp = ({ showChunkPopup, chunks, @@ -27,7 +25,6 @@ const ChunkPopUp = ({ currentPage: number | null; totalPageCount: number | null; }) => { - const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); const sortedChunksData = useMemo(() => { diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 6096cd06b..934a38f76 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -474,4 +474,4 @@ export default function ConnectionModal({
            ); -} \ No newline at end of file +} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 321179e9c..97c584b7c 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -367,4 +367,4 @@ export default function DeduplicationTab() { )} ); -} \ No newline at end of file +} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx index c0e09ddac..5f0049abd 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx @@ -329,4 +329,4 @@ export default function DeletePopUpForOrphanNodes({ )} ); -} \ No newline at end of file +} diff --git a/frontend/src/components/UI/CustomMenu.tsx b/frontend/src/components/UI/CustomMenu.tsx index e79c1bf00..fa33c368d 100644 --- a/frontend/src/components/UI/CustomMenu.tsx +++ b/frontend/src/components/UI/CustomMenu.tsx @@ -8,7 +8,13 @@ export default function CustomMenu({ isRoot = false, }: { open: boolean; - closeHandler: () => void; + closeHandler: ( + event: Event | undefined, + closeReason: { + type: 'backdropClick' | 'itemClick' | 'escapeKeyDown'; + id?: string; + } + ) => void; items: Menuitems[] | undefined; anchorOrigin: React.RefObject; isRoot?: boolean; diff --git a/frontend/src/hooks/useSourceInput.tsx b/frontend/src/hooks/useSourceInput.tsx index 7965c03b9..20fa0aba0 100644 --- a/frontend/src/hooks/useSourceInput.tsx +++ b/frontend/src/hooks/useSourceInput.tsx @@ -57,6 +57,12 @@ export default function useSourceInput( processingProgress: undefined, retryOption: '', retryOptionStatus: false, + chunkNodeCount: 0, + chunkRelCount: 0, + entityNodeCount: 0, + entityEntityRelCount: 0, + communityNodeCount: 0, + communityRelCount: 0, }; if (url.trim() != '') { setIsValid(validator(url) && isFocused); diff --git a/frontend/src/hooks/useSse.tsx b/frontend/src/hooks/useSse.tsx index 5612dc747..3ab1827f5 100644 --- a/frontend/src/hooks/useSse.tsx +++ b/frontend/src/hooks/useSse.tsx @@ -20,6 +20,12 @@ export default function useServerSideEvent( model, processed_chunk = 0, fileSize, + chunkNodeCount, + entityNodeCount, + communityNodeCount, + chunkRelCount, + entityEntityRelCount, + communityRelCount, } = eventSourceRes; const alertShownStatus = JSON.parse(localStorage.getItem('alertShown') || 'null'); @@ -38,6 +44,12 @@ export default function useServerSideEvent( return { ...curfile, status: total_chunks === processed_chunk ? 'Completed' : status, + chunkNodeCount: chunkNodeCount ?? 0, + entityNodeCount: entityNodeCount ?? 0, + communityNodeCount: communityNodeCount ?? 0, + chunkRelCount: chunkRelCount ?? 0, + entityEntityRelCount: entityEntityRelCount ?? 0, + communityRelCount: communityRelCount ?? 0, nodesCount: nodeCount, relationshipsCount: relationshipCount, model: model, @@ -61,6 +73,12 @@ export default function useServerSideEvent( relationshipsCount: relationshipCount, model: model, processingTotalTime: processingTime?.toFixed(2), + chunkNodeCount: chunkNodeCount ?? 0, + entityNodeCount: entityNodeCount ?? 0, + communityNodeCount: communityNodeCount ?? 0, + chunkRelCount: chunkRelCount ?? 0, + entityEntityRelCount: entityEntityRelCount ?? 0, + communityRelCount: communityRelCount ?? 0, }; } return curfile; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 036449a66..b2fb3e331 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -28,6 +28,12 @@ export interface CustomFileBase extends Partial { isChecked?: boolean; retryOptionStatus: boolean; retryOption: string; + chunkNodeCount: number; + chunkRelCount: number; + entityNodeCount: number; + entityEntityRelCount: number; + communityNodeCount: number; + communityRelCount: number; } export interface CustomFile extends CustomFileBase { id: string; @@ -312,6 +318,12 @@ export interface fileStatus { total_chunks?: number; // total_pages?: number; processed_chunk?: number; + chunkNodeCount: number; + chunkRelCount: number; + entityNodeCount: number; + entityEntityRelCount: number; + communityNodeCount: number; + communityRelCount: number; } export interface PollingAPI_Response extends Partial { data: statusupdate; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 642e4b4f0..8e9167cb0 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -13,26 +13,26 @@ export const llms = process.env?.VITE_LLM_MODELS?.trim() != '' ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) : [ - 'diffbot', - 'openai_gpt_3.5', - 'openai_gpt_4o', - 'openai_gpt_4o_mini', - 'gemini_1.5_pro', - 'gemini_1.5_flash', - 'azure_ai_gpt_35', - 'azure_ai_gpt_4o', - 'ollama_llama3', - 'groq_llama3_70b', - 'anthropic_claude_3_5_sonnet', - 'fireworks_llama_v3p2_90b', - 'bedrock_claude_3_5_sonnet', - ]; + 'diffbot', + 'openai_gpt_3.5', + 'openai_gpt_4o', + 'openai_gpt_4o_mini', + 'gemini_1.5_pro', + 'gemini_1.5_flash', + 'azure_ai_gpt_35', + 'azure_ai_gpt_4o', + 'ollama_llama3', + 'groq_llama3_70b', + 'anthropic_claude_3_5_sonnet', + 'fireworks_llama_v3p2_90b', + 'bedrock_claude_3_5_sonnet', + ]; export const defaultLLM = llms?.includes('openai_gpt_4o') ? 'openai_gpt_4o' : llms?.includes('gemini_1.5_pro') - ? 'gemini_1.5_pro' - : 'diffbot'; + ? 'gemini_1.5_pro' + : 'diffbot'; export const supportedLLmsForRagas = [ 'openai_gpt_3.5', 'openai_gpt_4', @@ -77,40 +77,40 @@ export const chatModeReadableLables: Record = { export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',').map((mode) => ({ - mode: mode.trim(), - description: getDescriptionForChatMode(mode.trim()), - })) + mode: mode.trim(), + description: getDescriptionForChatMode(mode.trim()), + })) : [ - { - mode: chatModeLables.vector, - description: 'Performs semantic similarity search on text chunks using vector indexing.', - }, - { - mode: chatModeLables.graph, - description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', - }, - { - mode: chatModeLables['graph+vector'], - description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', - }, - { - mode: chatModeLables.fulltext, - description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', - }, - { - mode: chatModeLables['graph+vector+fulltext'], - description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', - }, - { - mode: chatModeLables['entity search+vector'], - description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', - }, - { - mode: chatModeLables['global search+vector+fulltext'], - description: - 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', - }, - ]; + { + mode: chatModeLables.vector, + description: 'Performs semantic similarity search on text chunks using vector indexing.', + }, + { + mode: chatModeLables.graph, + description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', + }, + { + mode: chatModeLables['graph+vector'], + description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', + }, + { + mode: chatModeLables.fulltext, + description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', + }, + { + mode: chatModeLables['graph+vector+fulltext'], + description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', + }, + { + mode: chatModeLables['entity search+vector'], + description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', + }, + { + mode: chatModeLables['global search+vector+fulltext'], + description: + 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', + }, + ]; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; @@ -352,3 +352,10 @@ export function getStoredSchema() { } return []; } +export const metricsinfo: Record = { + faithfulness: 'Determines How accurately the answer reflects the provided information', + answer_relevancy: "Determines How well the answer addresses the user's question.", + rouge_score: 'Determines How much the generated answer matches the reference answer, word-for-word.', + semantic_score: 'Determines How well the generated answer understands the meaning of the reference answer.', + context_entity_recall_score: 'Determines the recall of entities present in both reference and retrieved contexts', +}; diff --git a/frontend/yarn.lock b/frontend/yarn.lock index 0859c13e9..253a4e287 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -760,15 +760,15 @@ "@types/chroma-js" "2.1.4" chroma-js "2.4.2" -"@neo4j-ndl/base@^3.0.10": - version "3.0.10" - resolved "https://registry.yarnpkg.com/@neo4j-ndl/base/-/base-3.0.10.tgz#909714ecab10bac08e2066841a83babc2931020b" - integrity sha512-ENZebO1o1oq9xTEJEVkKtlrF5QhfxZqM1lmxU6jbrdwa1TA+YeUVnyBBS3HoGcRGMFCJ3ayJQxnpj7J2ZCTSgA== - -"@neo4j-ndl/react@^3.0.17": - version "3.0.17" - resolved "https://registry.yarnpkg.com/@neo4j-ndl/react/-/react-3.0.17.tgz#bf323358fa470ba7f50e334a5381f2006f8ad1ef" - integrity sha512-qj392qAXNWpxjh62m/5EIZBVALaxSsd1ACbVsJaQEWy4QmfUz3KuAs3rsdwSueN2TYbyEy2C0myoLztlU+3D/A== +"@neo4j-ndl/base@^3.0.14": + version "3.0.14" + resolved "https://registry.yarnpkg.com/@neo4j-ndl/base/-/base-3.0.14.tgz#158db809cec3c986cf9170f8967f37cbf5740f61" + integrity sha512-GVRuibSXvhKsJMJH4J7ROQ1yyvNhvyiPBGewFJHR3fspxi/nKV4dh0jCspdSEdrtAhdhI5VGdwQlSDJREjEPzA== + +"@neo4j-ndl/react@^3.0.23": + version "3.0.23" + resolved "https://registry.yarnpkg.com/@neo4j-ndl/react/-/react-3.0.23.tgz#07b1c24f5ca5adee630318b8a0a67419b5736a53" + integrity sha512-/szMXh/2ROpAVrNrNTOP4IK5eMWB5hRNMHXfu7NJOyqYAhWJlMkjZQClGEM7qvBAX6RoV8mmRWafquWMZhdnoA== dependencies: "@dnd-kit/core" "6.1.0" "@dnd-kit/sortable" "8.0.0" From 95eaf19695b1a05b8db663bbdbc6e7cf6ccef9cf Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 20 Nov 2024 09:24:19 +0000 Subject: [PATCH 242/292] youtube url fix --- frontend/src/components/UI/HoverableLink.tsx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/frontend/src/components/UI/HoverableLink.tsx b/frontend/src/components/UI/HoverableLink.tsx index a212b0c88..4c320a14a 100644 --- a/frontend/src/components/UI/HoverableLink.tsx +++ b/frontend/src/components/UI/HoverableLink.tsx @@ -28,7 +28,8 @@ const HoverableLink: React.FC = ({ url, children }) => { setHovering(false); }; const isYouTubeURL = (url: string): boolean => { - return url.includes('youtube.com') || url.includes('youtu.be'); + const newurl = new URL(url); + return newurl.host === 'youtube.com' || newurl.host === 'youtu.be'; }; const extractYouTubeVideoId = (url: string): string => { const videoIdRegex = /(?:\/embed\/|\/watch\?v=|\/(?:embed\/|v\/|watch\?.*v=|youtu\.be\/|embed\/|v=))([^&?#]+)/; From 39e2807fbc28706fa7655e64364c09e7d807beda Mon Sep 17 00:00:00 2001 From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Date: Wed, 20 Nov 2024 12:05:34 +0000 Subject: [PATCH 243/292] Commented CSP middleware and added endpoint backend_connection_configuation --- backend/score.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/backend/score.py b/backend/score.py index 30f634292..666e2d60f 100644 --- a/backend/score.py +++ b/backend/score.py @@ -81,7 +81,7 @@ async def __call__(self, scope: Scope, receive: Receive, send: Send): await gzip_middleware(scope, receive, send) app = FastAPI() # SecWeb(app=app, Option={'referrer': False, 'xframe': False}) -app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) +# app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) app.add_middleware(XContentTypeOptions) app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) app.add_middleware(CustomGZipMiddleware, minimum_size=1000, compresslevel=5,paths=["/sources_list","/url/scan","/extract","/chat_bot","/chunk_entities","/get_neighbours","/graph_query","/schema","/populate_graph_schema","/get_unconnected_nodes_list","/get_duplicate_nodes","/fetch_chunktext"]) @@ -918,5 +918,25 @@ async def fetch_chunktext( finally: gc.collect() +@app.post("/backend_connection_configuation") +async def backend_connection_configuation(): + try: + graph = Neo4jGraph() + print(f'login connection status of object: {graph}') + if graph is not None: + graph_connection = True + return create_api_response('Success',message=f"Backend connection successful",data=graph_connection) + else: + graph_connection = False + return create_api_response('Success',message=f"Backend connection is not successful",data=graph_connection) + except Exception as e: + job_status = "Failed" + message="Unable to connect backend DB" + error_message = str(e) + logging.exception(f'{error_message}') + return create_api_response(job_status, message=message, error=error_message) + finally: + gc.collect() + if __name__ == "__main__": uvicorn.run(app) \ No newline at end of file From c7b4dc04c98f29e6cc94b686af1c2bfceab4dce6 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 20 Nov 2024 12:47:48 +0000 Subject: [PATCH 244/292] added csp header --- frontend/nginx/nginx.conf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/frontend/nginx/nginx.conf b/frontend/nginx/nginx.conf index bf047145d..3c940690b 100644 --- a/frontend/nginx/nginx.conf +++ b/frontend/nginx/nginx.conf @@ -2,6 +2,14 @@ server { listen 8080; add_header X-Frame-Options "DENY"; add_header X-Content-Type-Options "nosniff"; + set $backendapi "BACKEND_API_URL"; + set $segmentapi "SEGMENT_API_URL"; + set $webhost "*.FRONTEND_HOSTNAME"; + add_header Content-Security-Policy "connect-src 'self' $backendapi $segmentapi; + frame-src 'self' *.youtube.com *.wikipedia.org; + script-src 'self' 'unsafe-inline' https://accounts.google.com/gsi/client; + default-src 'self' $webhost data:; + style-src 'self' *.googleapis.com 'unsafe-inline';" always ; location / { root /usr/share/nginx/html; index index.html index.htm; From f244b9fc42542171c5973647da24e97c9f91d162 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:04:58 +0000 Subject: [PATCH 245/292] removed the useEffect --- frontend/src/components/Content.tsx | 7 ------- 1 file changed, 7 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 98d55b40e..0d352e159 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -167,13 +167,6 @@ const Content: React.FC = ({ setOpenConnection((prev) => ({ ...prev, openPopUp: true })); } }, []); - useEffect(() => { - if (currentPage >= 1) { - (async () => { - await getChunks(documentName, currentPage); - })(); - } - }, [currentPage, documentName]); useEffect(() => { if (afterFirstRender) { localStorage.setItem('processedCount', JSON.stringify({ db: userCredentials?.uri, count: processedCount })); From 9d17ec13b0f9573d87e9bb1a4761a012879eaf41 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Thu, 21 Nov 2024 20:43:14 +0530 Subject: [PATCH 246/292] Table issue (#888) * table changes * removed interdeterminant checkbox * removed material ui checkbox * labels changes * node labels * messages fix * aria-label added --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> --- frontend/src/App.css | 2 +- .../components/ChatBot/ChatOnlyComponent.tsx | 100 ++++++++++-------- frontend/src/components/ChatBot/Chatbot.tsx | 4 +- .../Local/DropZoneForSmallLayouts.tsx | 2 +- .../components/Graph/GraphPropertiesPanel.tsx | 71 +++++++------ frontend/src/components/Layout/Header.tsx | 40 +++++-- frontend/src/types.ts | 4 + frontend/src/utils/Constants.ts | 1 + 8 files changed, 140 insertions(+), 84 deletions(-) diff --git a/frontend/src/App.css b/frontend/src/App.css index da6c34af6..70127b6af 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -58,7 +58,7 @@ } .contentWithChatBot { - width: calc(-550px + 100dvw); + width: calc(-528px + 100dvw); height: calc(100dvh - 58px); padding: 3px; display: flex; diff --git a/frontend/src/components/ChatBot/ChatOnlyComponent.tsx b/frontend/src/components/ChatBot/ChatOnlyComponent.tsx index 25e618c16..ebeb3d7d5 100644 --- a/frontend/src/components/ChatBot/ChatOnlyComponent.tsx +++ b/frontend/src/components/ChatBot/ChatOnlyComponent.tsx @@ -1,29 +1,28 @@ +import { useEffect, useState, useCallback, useReducer } from 'react'; +import { useLocation } from 'react-router'; import { MessageContextWrapper, useMessageContext } from '../../context/UserMessages'; import UserCredentialsWrapper, { useCredentials } from '../../context/UserCredentials'; -import Chatbot from './Chatbot'; -import { getIsLoading } from '../../utils/Utils'; import { FileContextProvider } from '../../context/UsersFiles'; -import { useEffect, useState, useCallback } from 'react'; +import Chatbot from './Chatbot'; import ConnectionModal from '../Popups/ConnectionModal/ConnectionModal'; -import { clearChatAPI } from '../../services/QnaAPI'; -import { connectionState, Messages, UserCredentials } from '../../types'; -import { useLocation } from 'react-router'; import Header from '../Layout/Header'; +import { clearChatAPI } from '../../services/QnaAPI'; +import { ChatProps, connectionState, Messages, UserCredentials } from '../../types'; +import { getIsLoading } from '../../utils/Utils'; -interface chatProp { - chatMessages: Messages[]; -} -const ChatContent: React.FC = ({ chatMessages }) => { - const date = new Date(); +const ChatContent: React.FC = ({ chatMessages }) => { const { clearHistoryData, messages, setMessages, setClearHistoryData } = useMessageContext(); const { setUserCredentials, setConnectionStatus, connectionStatus } = useCredentials(); + const [showBackButton, setShowBackButton]= useReducer((state) => !state, false); const [openConnection, setOpenConnection] = useState({ openPopUp: false, chunksExists: false, vectorIndexMisMatch: false, chunksExistsWithDifferentDimension: false, }); - + /** + * Initializes connection settings based on URL parameters. + */ const initialiseConnection = useCallback(() => { const urlParams = new URLSearchParams(window.location.search); const uri = urlParams.get('uri'); @@ -31,22 +30,32 @@ const ChatContent: React.FC = ({ chatMessages }) => { const encodedPassword = urlParams.get('password'); const database = urlParams.get('database'); const port = urlParams.get('port'); - const openModel = urlParams.get('open') === 'true'; - if (openModel || !(uri && user && encodedPassword && database && port)) { + const openModal = urlParams.get('open') === 'true'; + if (openModal || !(uri && user && encodedPassword && database && port)) { setOpenConnection((prev) => ({ ...prev, openPopUp: true })); } else { - const credentialsForAPI = { uri, userName: user, password: atob(atob(encodedPassword)), database, port }; - setUserCredentials({ ...credentialsForAPI }); + const credentialsForAPI: UserCredentials = { + uri, + userName: user, + password: atob(atob(encodedPassword)), + database, + port, + }; + setShowBackButton(); + setUserCredentials(credentialsForAPI); setConnectionStatus(true); setMessages(chatMessages); + // Remove query params from URL window.history.replaceState({}, document.title, window.location.pathname); } - }, [connectionStatus, setUserCredentials]); + }, [chatMessages, setUserCredentials, setConnectionStatus, setMessages]); useEffect(() => { initialiseConnection(); - }, [connectionStatus]); - + }, [initialiseConnection]); + /** + * Handles successful connection establishment. + */ const handleConnectionSuccess = () => { setConnectionStatus(true); setOpenConnection((prev) => ({ ...prev, openPopUp: false })); @@ -54,26 +63,29 @@ const ChatContent: React.FC = ({ chatMessages }) => { urlParams.delete('openModal'); window.history.replaceState({}, document.title, `${window.location.pathname}?${urlParams.toString()}`); }; + /** + * Clears chat history by calling the API. + */ const deleteOnClick = async () => { try { setClearHistoryData(true); - const response = await clearChatAPI( - JSON.parse(localStorage.getItem('neo4j.connection') || '{}') as UserCredentials, - sessionStorage.getItem('session_id') ?? '' - ); + const credentials = JSON.parse(localStorage.getItem('neo4j.connection') || '{}') as UserCredentials; + const sessionId = sessionStorage.getItem('session_id') || ''; + const response = await clearChatAPI(credentials, sessionId); if (response.data.status !== 'Success') { setClearHistoryData(false); } } catch (error) { - console.log(error); + console.error('Error clearing chat history:', error); setClearHistoryData(false); } }; useEffect(() => { if (clearHistoryData) { + const currentDateTime = new Date(); setMessages([ { - datetime: `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`, + datetime: `${currentDateTime.toLocaleDateString()} ${currentDateTime.toLocaleTimeString()}`, id: 2, modes: { 'graph+vector+fulltext': { @@ -87,7 +99,7 @@ const ChatContent: React.FC = ({ chatMessages }) => { ]); setClearHistoryData(false); } - }, [clearHistoryData]); + }, [clearHistoryData, setMessages]); return ( <> = ({ chatMessages }) => { onSuccess={handleConnectionSuccess} isChatOnly={true} /> - { +
            +
            -
            -
            - -
            +
            - } +
            ); }; +/** +* ChatOnlyComponent +* Wrapper component to provide necessary context and initialize chat functionality. +*/ const ChatOnlyComponent: React.FC = () => { const location = useLocation(); + const chatMessages = location.state?.messages as Messages[] || []; return ( - + ); }; - -export default ChatOnlyComponent; +export default ChatOnlyComponent; \ No newline at end of file diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 4d1a1b971..03d027d04 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -410,8 +410,8 @@ const Chatbot: FC = (props) => { return (
            diff --git a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx index 0e78b4704..f3fbff852 100644 --- a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx +++ b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx @@ -222,7 +222,7 @@ export default function DropZoneForSmallLayouts() { return ( <>
            - + {isLoading ? : }
            diff --git a/frontend/src/components/Graph/GraphPropertiesPanel.tsx b/frontend/src/components/Graph/GraphPropertiesPanel.tsx index 64332c9ac..006dfc095 100644 --- a/frontend/src/components/Graph/GraphPropertiesPanel.tsx +++ b/frontend/src/components/Graph/GraphPropertiesPanel.tsx @@ -12,41 +12,50 @@ const isNode = (item: BasicNode | BasicRelationship): item is BasicNode => { const GraphPropertiesPanel = ({ inspectedItem, newScheme }: GraphPropertiesPanelProps) => { const inspectedItemType = isNode(inspectedItem) ? 'node' : 'relationship'; + const filteredProperties = + inspectedItemType === 'node' + ? Object.entries((inspectedItem as BasicNode).properties) + .filter(([, value]) => value !== null && value !== undefined && value !== ' ') + .reduce((acc, [key, value]) => { + acc[key] = value; + return acc; + }, {} as Record) + : {}; const properties = inspectedItemType === 'node' ? [ - { - key: '', - value: `${(inspectedItem as BasicNode).id}`, - type: 'String', - }, - ...Object.keys((inspectedItem as BasicNode).properties).map((key) => { - const value = (inspectedItem as BasicNode).properties[key]; - return { key: key, value: value ?? '' }; - }), - ] + { + key: '', + value: `${(inspectedItem as BasicNode).id}`, + type: 'String', + }, + ...Object.keys(filteredProperties).map((key) => { + const value = filteredProperties[key]; + return { key, value }; + }), + ] : [ - { - key: '', - value: `${(inspectedItem as BasicRelationship).id}`, - type: 'String', - }, - { - key: '', - value: `${(inspectedItem as BasicRelationship).from}`, - type: 'String', - }, - { - key: '', - value: `${(inspectedItem as BasicRelationship).to}`, - type: 'String', - }, - { - key: '', - value: `${(inspectedItem as BasicRelationship).caption ?? ''}`, - type: 'String', - }, - ]; + { + key: '', + value: `${(inspectedItem as BasicRelationship).id}`, + type: 'String', + }, + { + key: '', + value: `${(inspectedItem as BasicRelationship).from}`, + type: 'String', + }, + { + key: '', + value: `${(inspectedItem as BasicRelationship).to}`, + type: 'String', + }, + { + key: '', + value: `${(inspectedItem as BasicRelationship).caption ?? ''}`, + type: 'String', + }, + ]; const labelsSorted = useMemo(() => { if (isNode(inspectedItem)) { return [...inspectedItem.labels].sort(sortAlphabetically); diff --git a/frontend/src/components/Layout/Header.tsx b/frontend/src/components/Layout/Header.tsx index 766758cc5..36e4022f3 100644 --- a/frontend/src/components/Layout/Header.tsx +++ b/frontend/src/components/Layout/Header.tsx @@ -8,8 +8,9 @@ import { ArrowTopRightOnSquareIconOutline, TrashIconOutline, ArrowLeftIconOutline, + ArrowDownTrayIconOutline, } from '@neo4j-ndl/react/icons'; -import { Button, Typography } from '@neo4j-ndl/react'; +import { Button, TextLink, Typography } from '@neo4j-ndl/react'; import { Dispatch, memo, SetStateAction, useCallback, useContext, useEffect, useRef, useState } from 'react'; import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; import { buttonCaptions, tooltips } from '../../utils/Constants'; @@ -21,21 +22,23 @@ import { useMessageContext } from '../../context/UserMessages'; import { RiChatSettingsLine } from 'react-icons/ri'; import ChatModeToggle from '../ChatBot/ChatModeToggle'; import { connectionState } from '../../types'; +import { downloadClickHandler, getIsLoading } from '../../utils/Utils'; interface HeaderProp { chatOnly?: boolean; deleteOnClick?: () => void; setOpenConnection?: Dispatch>; + showBackButton?: boolean; } -const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnection }) => { +const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnection, showBackButton }) => { const { colorMode, toggleColorMode } = useContext(ThemeWrapperContext); const navigate = useNavigate(); const { messages } = useMessageContext(); const handleURLClick = useCallback((url: string) => { window.open(url, '_blank'); }, []); - + const downloadLinkRef = useRef(null); const { isSchema, setIsSchema } = useFileContext(); const { connectionStatus } = useCredentials(); const chatAnchor = useRef(null); @@ -46,6 +49,7 @@ const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnecti const openChatPopout = useCallback(() => { let session = localStorage.getItem('neo4j.connection'); + const isLoading = getIsLoading(messages); if (session) { const neo4jConnection = JSON.parse(session); const { uri } = neo4jConnection; @@ -57,7 +61,7 @@ const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnecti const chatUrl = `/chat-only?uri=${encodeURIComponent( uri )}&user=${userName}&password=${encodedPassword}&database=${database}&port=${port}&connectionStatus=${connectionStatus}`; - navigate(chatUrl, { state: messages }); + navigate(chatUrl, { state: { messages, isLoading } });; } else { const chatUrl = `/chat-only?openModal=true`; window.open(chatUrl, '_blank'); @@ -140,6 +144,7 @@ const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnecti text={tooltips.openChatPopout} size='large' clean + disabled={getIsLoading(messages)} > @@ -165,9 +170,10 @@ const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnecti {buttonCaptions.connectToNeo4j} )} - + {showBackButton && ( + )}
            { @@ -181,12 +187,34 @@ const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnecti
            + <> + downloadClickHandler( + { conversation: messages }, + downloadLinkRef, + 'graph-builder-conversation.json' + )} + disabled={messages.length === 1 || getIsLoading(messages)} + placement={chatOnly ? 'left' : 'bottom'} + label={tooltips.downloadChat} + > + + + + <> + + "" + + diff --git a/frontend/src/types.ts b/frontend/src/types.ts index b2fb3e331..fcb170604 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -902,3 +902,7 @@ export interface GraphViewHandlerProps { entityInfo?: Entity[]; mode?: string; } + +export interface ChatProps { + chatMessages: Messages[]; +} diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 8e9167cb0..ac8c1a502 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -144,6 +144,7 @@ export const tooltips = { clearGraphSettings: 'Clear configured Graph Schema', applySettings: 'Apply Graph Schema', openChatPopout: 'Chat', + downloadChat:'Download Conversation' }; export const buttonCaptions = { From 86c33ba864e3a0f3513105a8aaf77d450face438 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:20:22 +0000 Subject: [PATCH 247/292] key fix --- frontend/src/components/Content.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 0d352e159..97e2dcde6 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -596,7 +596,7 @@ const Content: React.FC = ({ status: 'Reprocess', processingProgress: isStartFromBegining ? 0 : f.processingProgress, nodesCount: isStartFromBegining ? 0 : f.nodesCount, - relationshipCount: isStartFromBegining ? 0 : f.relationshipsCount, + relationshipsCount: isStartFromBegining ? 0 : f.relationshipsCount, } : f; }); From 09df9e67229dc59961562a0ed4f9f537ba1a69f6 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 22 Nov 2024 11:28:59 +0530 Subject: [PATCH 248/292] Update README.md --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index fcc3026cc..5d8e90e2f 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,21 @@ If you are using Neo4j Desktop, you will not be able to use the docker-compose b #### Running through docker-compose By default only OpenAI and Diffbot are enabled since Gemini requires extra GCP configurations. +Set the values for nginx configuration in frontend/nginx/nginx.conf + +``` +set $backendapi "BACKEND_API_URL"; +set $segmentapi "SEGMENT_API_URL"; +set $webhost "*.FRONTEND_HOSTNAME"; +``` +Here the SEGMENT_API_URL is optional it will not impact the application it is used for analytics purpose. +Both BACKEND_API_URL and FRONTEND_HOSTNAME is compulsory to run the application. +BACKEND_API_URL is the url where backend server is up and running. +FRONTEND_HOSTNAME is the hostname of the frontend application url. +#example: +if the frontend application is running on http://localhost:8080 +the host name will be localhost:8080 + According to the environment, we are configuring the models which indicated by VITE_LLM_MODELS_PROD variable we can configure models based on our needs. EX: ```env From 56102474e33178628b8cea5c90153b15ba590028 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 22 Nov 2024 11:30:26 +0530 Subject: [PATCH 249/292] Update README.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5d8e90e2f..b745e0c51 100644 --- a/README.md +++ b/README.md @@ -32,16 +32,17 @@ If you are using Neo4j Desktop, you will not be able to use the docker-compose b #### Running through docker-compose By default only OpenAI and Diffbot are enabled since Gemini requires extra GCP configurations. -Set the values for nginx configuration in frontend/nginx/nginx.conf +Set the values for nginx configuration in frontend/nginx/nginx.conf. ``` set $backendapi "BACKEND_API_URL"; set $segmentapi "SEGMENT_API_URL"; set $webhost "*.FRONTEND_HOSTNAME"; ``` -Here the SEGMENT_API_URL is optional it will not impact the application it is used for analytics purpose. -Both BACKEND_API_URL and FRONTEND_HOSTNAME is compulsory to run the application. +Here the SEGMENT_API_URL is optional it will not impact the application it is used for analytics purpose.Both BACKEND_API_URL and FRONTEND_HOSTNAME is compulsory to run the application. + BACKEND_API_URL is the url where backend server is up and running. + FRONTEND_HOSTNAME is the hostname of the frontend application url. #example: if the frontend application is running on http://localhost:8080 From a247d759cc57a194a68b716e322b73f08851973f Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 22 Nov 2024 11:31:30 +0530 Subject: [PATCH 250/292] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index b745e0c51..7ad06358d 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,9 @@ Here the SEGMENT_API_URL is optional it will not impact the application it is us BACKEND_API_URL is the url where backend server is up and running. FRONTEND_HOSTNAME is the hostname of the frontend application url. + #example: + if the frontend application is running on http://localhost:8080 the host name will be localhost:8080 From 69d46ab38674fb521924748190582f958fcdf00f Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Fri, 22 Nov 2024 12:26:27 +0530 Subject: [PATCH 251/292] removed extra document nodes and combine chunk logic (#894) --- backend/src/llm.py | 13 ++++++++++++- backend/src/shared/common_fn.py | 4 ++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/backend/src/llm.py b/backend/src/llm.py index 93ee0f08f..bfb2a7abc 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -144,6 +144,16 @@ def get_combined_chunks(chunkId_chunkDoc_list): ) return combined_chunk_document_list +def get_chunk_id_as_doc_metadata(chunkId_chunkDoc_list): + combined_chunk_document_list = [ + Document( + page_content=document["chunk_doc"].page_content, + metadata={"chunk_id": [document["chunk_id"]]}, + ) + for document in chunkId_chunkDoc_list + ] + return combined_chunk_document_list + async def get_graph_document_list( llm, combined_chunk_document_list, allowedNodes, allowedRelationship @@ -191,7 +201,8 @@ async def get_graph_document_list( async def get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship): llm, model_name = get_llm(model) - combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list) + #combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list) + combined_chunk_document_list = get_chunk_id_as_doc_metadata(chunkId_chunkDoc_list) if allowedNodes is None or allowedNodes=="": allowedNodes =[] diff --git a/backend/src/shared/common_fn.py b/backend/src/shared/common_fn.py index 6d24912c7..e6de7a583 100644 --- a/backend/src/shared/common_fn.py +++ b/backend/src/shared/common_fn.py @@ -60,7 +60,7 @@ def get_chunk_and_graphDocument(graph_document_list, chunkId_chunkDoc_list): logging.info("creating list of chunks and graph documents in get_chunk_and_graphDocument func") lst_chunk_chunkId_document=[] for graph_document in graph_document_list: - for chunk_id in graph_document.source.metadata['combined_chunk_ids'] : + for chunk_id in graph_document.source.metadata['chunk_id'] : lst_chunk_chunkId_document.append({'graph_doc':graph_document,'chunk_id':chunk_id}) return lst_chunk_chunkId_document @@ -94,7 +94,7 @@ def load_embedding_model(embedding_model_name: str): return embeddings, dimension def save_graphDocuments_in_neo4j(graph:Neo4jGraph, graph_document_list:List[GraphDocument]): - graph.add_graph_documents(graph_document_list, baseEntityLabel=True,include_source=True) + graph.add_graph_documents(graph_document_list, baseEntityLabel=True) # graph.add_graph_documents(graph_document_list) def handle_backticks_nodes_relationship_id_type(graph_document_list:List[GraphDocument]): From 4721318f2c1856b76b21312aa335d5c03b75d53e Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 22 Nov 2024 13:34:29 +0530 Subject: [PATCH 252/292] Update README.md --- README.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 7ad06358d..557eed6dd 100644 --- a/README.md +++ b/README.md @@ -210,13 +210,14 @@ VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-backendurl} ## Usage -1. Connect to Neo4j Aura Instance by passing URI and password or using Neo4j credentials file. -2. Choose your source from a list of Unstructured sources to create graph. -3. Change the LLM (if required) from drop down, which will be used to generate graph. -4. Optionally, define schema(nodes and relationship labels) in entity graph extraction settings. -5. Either select multiple files to 'Generate Graph' or all the files in 'New' status will be processed for graph creation. -6. Have a look at the graph for individual files using 'View' in grid or select one or more files and 'Preview Graph' -7. Ask questions related to the processed/completed sources to chat-bot, Also get detailed information about your answers generated by LLM. +1. Connect to Neo4j Aura Instance which can be both AURA DS or AURA DB by passing URI and password or using Neo4j credentials file. +2. To differntiate we have added different icons. For AURA DB we have a database icon and for AURA DS we have scientific molecule icon right under Neo4j Connection details label. +3. Choose your source from a list of Unstructured sources to create graph. +4. Change the LLM (if required) from drop down, which will be used to generate graph. +5. Optionally, define schema(nodes and relationship labels) in entity graph extraction settings. +6. Either select multiple files to 'Generate Graph' or all the files in 'New' status will be processed for graph creation. +7. Have a look at the graph for individual files using 'View' in grid or select one or more files and 'Preview Graph' +8. Ask questions related to the processed/completed sources to chat-bot, Also get detailed information about your answers generated by LLM. ## Links From ab0405fc00b443510164d6e888af98083b6ba2a5 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 22 Nov 2024 13:36:25 +0530 Subject: [PATCH 253/292] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 557eed6dd..498190c5d 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ BACKEND_API_URL is the url where backend server is up and running. FRONTEND_HOSTNAME is the hostname of the frontend application url. -#example: +EX: if the frontend application is running on http://localhost:8080 the host name will be localhost:8080 From 9628b6d8501f9f03fa2a987f2481baa8d5677947 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 22 Nov 2024 10:00:39 +0000 Subject: [PATCH 254/292] conditional deployment based on the enviornment --- docker-compose.yml | 5 +++-- frontend/Dockerfile | 4 +++- frontend/example.env | 2 ++ frontend/nginx/nginx.local.conf | 16 ++++++++++++++++ frontend/nginx/{nginx.conf => nginx.prod.conf} | 7 ++----- 5 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 frontend/nginx/nginx.local.conf rename frontend/nginx/{nginx.conf => nginx.prod.conf} (68%) diff --git a/docker-compose.yml b/docker-compose.yml index 8a0fdc4b2..ea074f50b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -63,11 +63,12 @@ services: - VITE_BATCH_SIZE=${VITE_BATCH_SIZE-2} - VITE_LLM_MODELS=${VITE_LLM_MODELS-} - VITE_LLM_MODELS_PROD=${VITE_LLM_MODELS_PROD-openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash} + - DEPLOYMENT_ENV=local volumes: - ./frontend:/app - /app/node_modules - # env_file: - # - ./frontend/.env + env_file: + - ./frontend/.env container_name: frontend ports: - "8080:8080" diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 311294f4a..d0a258a59 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -33,8 +33,10 @@ RUN VITE_BACKEND_API_URL=$VITE_BACKEND_API_URL \ # Step 2: Serve the application using Nginx FROM nginx:alpine +ARG DEPLOYMENT_ENV="local" +ENV DEPLOYMENT_ENV=$DEPLOYMENT_ENV COPY --from=build /app/dist /usr/share/nginx/html -COPY nginx/nginx.conf /etc/nginx/conf.d/default.conf +COPY /nginx/nginx.${DEPLOYMENT_ENV}.conf /etc/nginx/templates/nginx.conf.template EXPOSE 8080 CMD ["nginx", "-g", "daemon off;"] diff --git a/frontend/example.env b/frontend/example.env index 4063fbc37..901ac6017 100644 --- a/frontend/example.env +++ b/frontend/example.env @@ -10,3 +10,5 @@ VITE_GOOGLE_CLIENT_ID="" VITE_CHAT_MODES="" VITE_BATCH_SIZE=2 VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" +VITE_FRONTEND_HOSTNAME="localhost:8080" +VITE_SEGMENT_API_URL="" diff --git a/frontend/nginx/nginx.local.conf b/frontend/nginx/nginx.local.conf new file mode 100644 index 000000000..2bab3515f --- /dev/null +++ b/frontend/nginx/nginx.local.conf @@ -0,0 +1,16 @@ +server { + + listen 8080; + + location / { + root /usr/share/nginx/html; + index index.html index.htm; + try_files $uri $uri/ /index.html; + } + + error_page 401 403 404 index.html; + + location /public { + root /usr/local/var/www; + } +} \ No newline at end of file diff --git a/frontend/nginx/nginx.conf b/frontend/nginx/nginx.prod.conf similarity index 68% rename from frontend/nginx/nginx.conf rename to frontend/nginx/nginx.prod.conf index 3c940690b..ac6c1db82 100644 --- a/frontend/nginx/nginx.conf +++ b/frontend/nginx/nginx.prod.conf @@ -2,13 +2,10 @@ server { listen 8080; add_header X-Frame-Options "DENY"; add_header X-Content-Type-Options "nosniff"; - set $backendapi "BACKEND_API_URL"; - set $segmentapi "SEGMENT_API_URL"; - set $webhost "*.FRONTEND_HOSTNAME"; - add_header Content-Security-Policy "connect-src 'self' $backendapi $segmentapi; + add_header Content-Security-Policy "connect-src 'self' ${VITE_BACKEND_API_URL} ${VITE_SEGMENT_API_URL}; frame-src 'self' *.youtube.com *.wikipedia.org; script-src 'self' 'unsafe-inline' https://accounts.google.com/gsi/client; - default-src 'self' $webhost data:; + default-src 'self' *.${VITE_FRONTEND_HOSTNAME} data:; style-src 'self' *.googleapis.com 'unsafe-inline';" always ; location / { root /usr/share/nginx/html; From 03526a4f0c376c82836fe7156b619d8f30fdc160 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 22 Nov 2024 15:48:50 +0530 Subject: [PATCH 255/292] Update README.md --- README.md | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/README.md b/README.md index 498190c5d..cbf625993 100644 --- a/README.md +++ b/README.md @@ -31,25 +31,6 @@ If you are using Neo4j Desktop, you will not be able to use the docker-compose b ### Local deployment #### Running through docker-compose By default only OpenAI and Diffbot are enabled since Gemini requires extra GCP configurations. - -Set the values for nginx configuration in frontend/nginx/nginx.conf. - -``` -set $backendapi "BACKEND_API_URL"; -set $segmentapi "SEGMENT_API_URL"; -set $webhost "*.FRONTEND_HOSTNAME"; -``` -Here the SEGMENT_API_URL is optional it will not impact the application it is used for analytics purpose.Both BACKEND_API_URL and FRONTEND_HOSTNAME is compulsory to run the application. - -BACKEND_API_URL is the url where backend server is up and running. - -FRONTEND_HOSTNAME is the hostname of the frontend application url. - -EX: - -if the frontend application is running on http://localhost:8080 -the host name will be localhost:8080 - According to the environment, we are configuring the models which indicated by VITE_LLM_MODELS_PROD variable we can configure models based on our needs. EX: ```env From 856288053037d331060d38c70e1049dc2269efcb Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 22 Nov 2024 15:54:29 +0530 Subject: [PATCH 256/292] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index cbf625993..164e51f00 100644 --- a/README.md +++ b/README.md @@ -155,6 +155,7 @@ Allow unauthenticated request : Yes | VITE_CHUNK_SIZE | Optional | 5242880 | Size of each chunk of file for upload | | VITE_GOOGLE_CLIENT_ID | Optional | | Client ID for Google authentication | | VITE_LLM_MODELS_PROD | Optional | openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash | To Distinguish models based on the Enviornment PROD or DEV +| VITE_LLM_MODELS | Optional | 'diffbot,openai_gpt_3.5,openai_gpt_4o,openai_gpt_4o_mini,gemini_1.5_pro,gemini_1.5_flash,azure_ai_gpt_35,azure_ai_gpt_4o,ollama_llama3,groq_llama3_70b,anthropic_claude_3_5_sonnet' | Supported Models For the application | GCS_FILE_CACHE | Optional | False | If set to True, will save the files to process into GCS. If set to False, will save the files locally | | ENTITY_EMBEDDING | Optional | False | If set to True, It will add embeddings for each entity in database | | LLM_MODEL_CONFIG_ollama_ | Optional | | Set ollama config as - model_name,model_local_url for local deployments | From c5603e6b7ff8e5a2cb9372e68a40e77f62c91ed7 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Fri, 22 Nov 2024 11:54:54 +0000 Subject: [PATCH 257/292] removed the reference answer checkbox and textarea while additional metrics are loading --- .../src/components/ChatBot/ChatInfoModal.tsx | 10 ++- .../components/ChatBot/ChatOnlyComponent.tsx | 19 +++-- .../components/ChatBot/MultiModeMetrics.tsx | 6 +- .../components/Graph/GraphPropertiesPanel.tsx | 72 +++++++++---------- frontend/src/components/Layout/Header.tsx | 33 ++++++--- frontend/src/utils/Constants.ts | 2 +- 6 files changed, 83 insertions(+), 59 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 6a473645b..1609f1eac 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -223,7 +223,9 @@ const ChatInfoModal: React.FC = ({ defaultMode, ]) ); + toggleReferenceVisibility(); } + const metricsResponse = await Promise.allSettled(metricsPromise); const successresponse = []; for (let index = 0; index < metricsResponse.length; index++) { @@ -280,6 +282,7 @@ const ChatInfoModal: React.FC = ({ modesarray ) ); + toggleReferenceVisibility(); } const metricsResponse = await Promise.allSettled(metricsPromise); toggleMetricsLoading(); @@ -425,13 +428,18 @@ const ChatInfoModal: React.FC = ({ toggleReferenceVisibility={toggleReferenceVisibility} isVisible={ isSingleMode && + !metricsLoading && (isAdditionalMetricsWithSingleMode === false || isAdditionalMetricsWithSingleMode === null) } /> = ({ chatMessages }) => { const { clearHistoryData, messages, setMessages, setClearHistoryData } = useMessageContext(); const { setUserCredentials, setConnectionStatus, connectionStatus } = useCredentials(); - const [showBackButton, setShowBackButton]= useReducer((state) => !state, false); + const [showBackButton, setShowBackButton] = useReducer((state) => !state, false); const [openConnection, setOpenConnection] = useState({ openPopUp: false, chunksExists: false, @@ -113,7 +113,12 @@ const ChatContent: React.FC = ({ chatMessages }) => { isChatOnly={true} />
            -
            +
            = ({ chatMessages }) => { ); }; /** -* ChatOnlyComponent -* Wrapper component to provide necessary context and initialize chat functionality. -*/ + * ChatOnlyComponent + * Wrapper component to provide necessary context and initialize chat functionality. + */ const ChatOnlyComponent: React.FC = () => { const location = useLocation(); - const chatMessages = location.state?.messages as Messages[] || []; + const chatMessages = (location.state?.messages as Messages[]) || []; return ( @@ -146,4 +151,4 @@ const ChatOnlyComponent: React.FC = () => { ); }; -export default ChatOnlyComponent; \ No newline at end of file +export default ChatOnlyComponent; diff --git a/frontend/src/components/ChatBot/MultiModeMetrics.tsx b/frontend/src/components/ChatBot/MultiModeMetrics.tsx index 0ecae78c3..c97d917b2 100644 --- a/frontend/src/components/ChatBot/MultiModeMetrics.tsx +++ b/frontend/src/components/ChatBot/MultiModeMetrics.tsx @@ -48,7 +48,7 @@ export default function MultiModeMetrics({ footer: (info) => info.column.id, }), columnHelper.accessor((row) => row.answer_relevancy as number, { - id: 'Relevancy', + id: 'Answer Relevancy', cell: (info) => { return {info.getValue().toFixed(2)}; }, @@ -71,7 +71,7 @@ export default function MultiModeMetrics({ ), }), columnHelper.accessor((row) => row.faithfulness as number, { - id: 'Score', + id: 'Faithfullness', cell: (info) => { return {info.getValue().toFixed(2)}; }, @@ -94,7 +94,7 @@ export default function MultiModeMetrics({ ), }), columnHelper.accessor((row) => row.context_entity_recall_score as number, { - id: 'Recall Score', + id: 'Entity Recall Score', cell: (info) => { return {info.getValue()?.toFixed(2)}; }, diff --git a/frontend/src/components/Graph/GraphPropertiesPanel.tsx b/frontend/src/components/Graph/GraphPropertiesPanel.tsx index 006dfc095..a11399971 100644 --- a/frontend/src/components/Graph/GraphPropertiesPanel.tsx +++ b/frontend/src/components/Graph/GraphPropertiesPanel.tsx @@ -15,47 +15,47 @@ const GraphPropertiesPanel = ({ inspectedItem, newScheme }: GraphPropertiesPanel const filteredProperties = inspectedItemType === 'node' ? Object.entries((inspectedItem as BasicNode).properties) - .filter(([, value]) => value !== null && value !== undefined && value !== ' ') - .reduce((acc, [key, value]) => { - acc[key] = value; - return acc; - }, {} as Record) + .filter(([, value]) => value !== null && value !== undefined && value !== ' ') + .reduce((acc, [key, value]) => { + acc[key] = value; + return acc; + }, {} as Record) : {}; const properties = inspectedItemType === 'node' ? [ - { - key: '', - value: `${(inspectedItem as BasicNode).id}`, - type: 'String', - }, - ...Object.keys(filteredProperties).map((key) => { - const value = filteredProperties[key]; - return { key, value }; - }), - ] + { + key: '', + value: `${(inspectedItem as BasicNode).id}`, + type: 'String', + }, + ...Object.keys(filteredProperties).map((key) => { + const value = filteredProperties[key]; + return { key, value }; + }), + ] : [ - { - key: '', - value: `${(inspectedItem as BasicRelationship).id}`, - type: 'String', - }, - { - key: '', - value: `${(inspectedItem as BasicRelationship).from}`, - type: 'String', - }, - { - key: '', - value: `${(inspectedItem as BasicRelationship).to}`, - type: 'String', - }, - { - key: '', - value: `${(inspectedItem as BasicRelationship).caption ?? ''}`, - type: 'String', - }, - ]; + { + key: '', + value: `${(inspectedItem as BasicRelationship).id}`, + type: 'String', + }, + { + key: '', + value: `${(inspectedItem as BasicRelationship).from}`, + type: 'String', + }, + { + key: '', + value: `${(inspectedItem as BasicRelationship).to}`, + type: 'String', + }, + { + key: '', + value: `${(inspectedItem as BasicRelationship).caption ?? ''}`, + type: 'String', + }, + ]; const labelsSorted = useMemo(() => { if (isNode(inspectedItem)) { return [...inspectedItem.labels].sort(sortAlphabetically); diff --git a/frontend/src/components/Layout/Header.tsx b/frontend/src/components/Layout/Header.tsx index 36e4022f3..e537251db 100644 --- a/frontend/src/components/Layout/Header.tsx +++ b/frontend/src/components/Layout/Header.tsx @@ -61,7 +61,7 @@ const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnecti const chatUrl = `/chat-only?uri=${encodeURIComponent( uri )}&user=${userName}&password=${encodedPassword}&database=${database}&port=${port}&connectionStatus=${connectionStatus}`; - navigate(chatUrl, { state: { messages, isLoading } });; + navigate(chatUrl, { state: { messages, isLoading } }); } else { const chatUrl = `/chat-only?openModal=true`; window.open(chatUrl, '_blank'); @@ -170,9 +170,17 @@ const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnecti {buttonCaptions.connectToNeo4j} )} - {showBackButton && ( - - + {showBackButton && ( + + + )}
            = ({ chatOnly, deleteOnClick, setOpenConnecti text={tooltips.downloadChat} aria-label='Download Chat' clean - onClick={() => downloadClickHandler( - { conversation: messages }, - downloadLinkRef, - 'graph-builder-conversation.json' - )} + onClick={() => + downloadClickHandler( + { conversation: messages }, + downloadLinkRef, + 'graph-builder-conversation.json' + ) + } disabled={messages.length === 1 || getIsLoading(messages)} placement={chatOnly ? 'left' : 'bottom'} label={tooltips.downloadChat} > - + <> "" - + + Date: Fri, 22 Nov 2024 12:45:17 +0000 Subject: [PATCH 258/292] LLM_MODELS --- frontend/Dockerfile | 1 + frontend/nginx/nginx.prod.conf | 1 + 2 files changed, 2 insertions(+) diff --git a/frontend/Dockerfile b/frontend/Dockerfile index d0a258a59..066563b30 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -28,6 +28,7 @@ RUN VITE_BACKEND_API_URL=$VITE_BACKEND_API_URL \ VITE_LARGE_FILE_SIZE=${VITE_LARGE_FILE_SIZE} \ VITE_CHAT_MODES=$VITE_CHAT_MODES \ VITE_BATCH_SIZE=$VITE_BATCH_SIZE \ + VITE_LLM_MODELS=$VITE_LLM_MODELS \ VITE_LLM_MODELS_PROD=$VITE_LLM_MODELS_PROD \ yarn run build diff --git a/frontend/nginx/nginx.prod.conf b/frontend/nginx/nginx.prod.conf index ac6c1db82..f58585714 100644 --- a/frontend/nginx/nginx.prod.conf +++ b/frontend/nginx/nginx.prod.conf @@ -7,6 +7,7 @@ server { script-src 'self' 'unsafe-inline' https://accounts.google.com/gsi/client; default-src 'self' *.${VITE_FRONTEND_HOSTNAME} data:; style-src 'self' *.googleapis.com 'unsafe-inline';" always ; + gzip on; location / { root /usr/share/nginx/html; index index.html index.htm; From 33354f326b36c11805616d28417f5afd076e85ca Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Mon, 25 Nov 2024 18:01:30 +0530 Subject: [PATCH 259/292] re process feature state renaming (#898) * Status Change From Reprocess to Ready To Reprocess * Added the description * text changes --- backend/score.py | 2 +- backend/src/main.py | 2 +- docs/backend/backend_docs.adoc | 10 +++--- frontend/src/components/Content.tsx | 33 +++++++++++-------- frontend/src/components/FileTable.tsx | 4 +-- .../Popups/RetryConfirmation/Index.tsx | 7 +++- frontend/src/utils/Utils.ts | 4 +-- 7 files changed, 37 insertions(+), 25 deletions(-) diff --git a/backend/score.py b/backend/score.py index 44073be0c..6d4be0833 100644 --- a/backend/score.py +++ b/backend/score.py @@ -826,7 +826,7 @@ async def retry_processing(uri=Form(), userName=Form(), password=Form(), databas json_obj = {'api_name':'retry_processing', 'db_url':uri, 'userName':userName, 'database':database, 'file_name':file_name,'retry_condition':retry_condition, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") - return create_api_response('Success',message=f"Status set to Reprocess for filename : {file_name}") + return create_api_response('Success',message=f"Status set to Ready to Reprocess for filename : {file_name}") except Exception as e: job_status = "Failed" message="Unable to set status to Retry" diff --git a/backend/src/main.py b/backend/src/main.py index dc1cdd620..c5add5343 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -709,7 +709,7 @@ def populate_graph_schema_from_text(text, model, is_schema_description_cheked): def set_status_retry(graph, file_name, retry_condition): graphDb_data_Access = graphDBdataAccess(graph) obj_source_node = sourceNode() - status = "Reprocess" + status = "Ready to Reprocess" obj_source_node.file_name = file_name obj_source_node.status = status obj_source_node.retry_condition = retry_condition diff --git a/docs/backend/backend_docs.adoc b/docs/backend/backend_docs.adoc index 2591ac47e..d313a3402 100644 --- a/docs/backend/backend_docs.adoc +++ b/docs/backend/backend_docs.adoc @@ -923,14 +923,14 @@ The API is used to drop and create the vector index when vector index dimesion a POST /retry_processing ---- -This API is used to reprocess cancelled, completed or failed file sources. -Users have 3 options to reprocess files: +This API is used to Ready to Reprocess cancelled, completed or failed file sources. +Users have 3 options to Ready to Reprocess files: * Start from begnning - In this condition file will be processed from the begnning i.e. 1st chunk again. * Delete entities and start from begnning - If the file source is already processed and have any existing nodes and relations then those will be deleted and file will be reprocessed from the 1st chunk. * Start from last processed postion - Cancelled or failed files will be processed from the last successfully processed chunk position. This option is not available for completed files. -Ones the status is set to 'Reprocess', user can again click on Generate graph to process the file for knowledge graph creation. +Ones the status is set to 'Ready to Reprocess', user can again click on Generate graph to process the file for knowledge graph creation. **API Parameters :** @@ -938,7 +938,7 @@ Ones the status is set to 'Reprocess', user can again click on Generate graph to * `userName`= Neo4j db username, * `password`= Neo4j db password, * `database`= Neo4j database name, -* `file_name`= Name of the file which user want to reprocess. +* `file_name`= Name of the file which user want to Ready to Reprocess. * `retry_condition` = One of the above 3 conditions which is selected for reprocessing. @@ -947,7 +947,7 @@ Ones the status is set to 'Reprocess', user can again click on Generate graph to .... { "status": "Success", - "message": "Status set to Reprocess for filename : $filename" + "message": "Status set to Ready to Reprocess for filename : $filename" } .... diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 97e2dcde6..12b0705bf 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -261,7 +261,9 @@ const Content: React.FC = ({ return { ...curfile, model: - curfile.status === 'New' || curfile.status === 'Reprocess' ? selectedOption?.value ?? '' : curfile.model, + curfile.status === 'New' || curfile.status === 'Ready to Reprocess' + ? selectedOption?.value ?? '' + : curfile.model, }; }); }); @@ -416,7 +418,7 @@ const Content: React.FC = ({ showNormalToast(`Processing ${batch.length} files at a time.`); for (let i = 0; i < batch.length; i++) { if (newCheck) { - if (batch[i]?.status === 'New' || batch[i].status === 'Reprocess') { + if (batch[i]?.status === 'New' || batch[i].status === 'Ready to Reprocess') { data.push(extractData(batch[i].id, isSelectedFiles, selectedFiles as CustomFile[])); } } else { @@ -537,7 +539,7 @@ const Content: React.FC = ({ } else { const selectedNewFiles = childRef.current ?.getSelectedRows() - .filter((f) => f.status === 'New' || f.status == 'Reprocess'); + .filter((f) => f.status === 'New' || f.status == 'Ready to Reprocess'); addFilesToQueue(selectedNewFiles as CustomFile[]); } }; @@ -593,7 +595,7 @@ const Content: React.FC = ({ return f.name === filename ? { ...f, - status: 'Reprocess', + status: 'Ready to Reprocess', processingProgress: isStartFromBegining ? 0 : f.processingProgress, nodesCount: isStartFromBegining ? 0 : f.nodesCount, relationshipsCount: isStartFromBegining ? 0 : f.relationshipsCount, @@ -621,7 +623,8 @@ const Content: React.FC = ({ ); const newFilecheck = useMemo( - () => childRef.current?.getSelectedRows().filter((f) => f.status === 'New' || f.status == 'Reprocess').length, + () => + childRef.current?.getSelectedRows().filter((f) => f.status === 'New' || f.status == 'Ready to Reprocess').length, [childRef.current?.getSelectedRows()] ); @@ -631,7 +634,7 @@ const Content: React.FC = ({ ); const dropdowncheck = useMemo( - () => !filesData.some((f) => f.status === 'New' || f.status === 'Waiting' || f.status === 'Reprocess'), + () => !filesData.some((f) => f.status === 'New' || f.status === 'Waiting' || f.status === 'Ready to Reprocess'), [filesData] ); @@ -651,12 +654,12 @@ const Content: React.FC = ({ if (selectedRows?.length) { for (let index = 0; index < selectedRows.length; index++) { const parsedFile: CustomFile = selectedRows[index]; - if (parsedFile.status === 'New' || parsedFile.status == 'Reprocess') { + if (parsedFile.status === 'New' || parsedFile.status == 'Ready to Reprocess') { newstatusfiles.push(parsedFile); } } } else if (filesData.length) { - newstatusfiles = filesData.filter((f) => f.status === 'New' || f.status === 'Reprocess'); + newstatusfiles = filesData.filter((f) => f.status === 'New' || f.status === 'Ready to Reprocess'); } return newstatusfiles; }, [filesData, childRef.current?.getSelectedRows()]); @@ -708,7 +711,7 @@ const Content: React.FC = ({ if ( parsedData.fileSource === 'local file' && typeof parsedData.size === 'number' && - (parsedData.status === 'New' || parsedData.status == 'Reprocess') && + (parsedData.status === 'New' || parsedData.status == 'Ready to Reprocess') && parsedData.size > largeFileSize ) { selectedLargeFiles.push(parsedData); @@ -717,16 +720,20 @@ const Content: React.FC = ({ if (selectedLargeFiles.length) { setshowConfirmationModal(true); } else { - handleGenerateGraph(selectedRows.filter((f) => f.status === 'New' || f.status === 'Reprocess')); + handleGenerateGraph(selectedRows.filter((f) => f.status === 'New' || f.status === 'Ready to Reprocess')); } } else if (filesData.length) { const largefiles = filesData.filter((f) => { - if (typeof f.size === 'number' && (f.status === 'New' || f.status == 'Reprocess') && f.size > largeFileSize) { + if ( + typeof f.size === 'number' && + (f.status === 'New' || f.status == 'Ready to Reprocess') && + f.size > largeFileSize + ) { return true; } return false; }); - const selectAllNewFiles = filesData.filter((f) => f.status === 'New' || f.status === 'Reprocess'); + const selectAllNewFiles = filesData.filter((f) => f.status === 'New' || f.status === 'Ready to Reprocess'); const stringified = selectAllNewFiles.reduce((accu, f) => { const key = f.id; // @ts-ignore @@ -737,7 +744,7 @@ const Content: React.FC = ({ if (largefiles.length) { setshowConfirmationModal(true); } else { - handleGenerateGraph(filesData.filter((f) => f.status === 'New' || f.status === 'Reprocess')); + handleGenerateGraph(filesData.filter((f) => f.status === 'New' || f.status === 'Ready to Reprocess')); } } }; diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 22f48c119..f02e7fb7f 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -173,9 +173,9 @@ const FileTable = forwardRef((props, ref) => { onRetry(info?.row?.id as string)} > diff --git a/frontend/src/components/Popups/RetryConfirmation/Index.tsx b/frontend/src/components/Popups/RetryConfirmation/Index.tsx index 5112ca688..379be9598 100644 --- a/frontend/src/components/Popups/RetryConfirmation/Index.tsx +++ b/frontend/src/components/Popups/RetryConfirmation/Index.tsx @@ -29,6 +29,9 @@ function RetryConfirmationDialog({ return ( Reprocess Options + + Clicking "Continue" will mark these files as "Ready to Reprocess." To proceed, click “Generate Graph” to start the reprocessing process. + {alertStatus.showAlert && ( @@ -69,7 +72,9 @@ function RetryConfirmationDialog({ { diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index beba5e3d1..7314c55e7 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -69,7 +69,7 @@ export const statusCheck = (status: string) => { return 'danger'; case 'Upload Failed': return 'danger'; - case 'Reprocess': + case 'Ready to Reprocess': return 'info'; default: return 'unknown'; @@ -378,7 +378,7 @@ export const getFileSourceStatus = (item: SourceNode) => { if (item?.fileSource === 'local file') { return item?.status; } - if (item?.status === 'Completed' || item.status === 'Failed' || item.status === 'Reprocess') { + if (item?.status === 'Completed' || item.status === 'Failed' || item.status === 'Ready to Reprocess') { return item?.status; } if ( From 9e8ffca977b450c21667ceddba74288e4e87a18d Mon Sep 17 00:00:00 2001 From: kaustubh-darekar Date: Mon, 25 Nov 2024 20:06:38 +0530 Subject: [PATCH 260/292] Community Counts after post processing (#890) * Community count updated in post processing api * Community count query changed * API integration for communities post counts * node and relationships count * filename check * show communities in popover only if its GDS * Code segregation --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> --- backend/score.py | 9 +- backend/src/shared/constants.py | 144 ++++++++++--------- frontend/src/components/BreakDownPopOver.tsx | 33 +++++ frontend/src/components/Content.tsx | 70 ++++++++- frontend/src/components/FileTable.tsx | 34 +---- frontend/src/components/UI/CustomPopOver.tsx | 15 ++ 6 files changed, 199 insertions(+), 106 deletions(-) create mode 100644 frontend/src/components/BreakDownPopOver.tsx create mode 100644 frontend/src/components/UI/CustomPopOver.tsx diff --git a/backend/score.py b/backend/score.py index 6d4be0833..ac2468066 100644 --- a/backend/score.py +++ b/backend/score.py @@ -317,6 +317,7 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database try: graph = create_graph_database_connection(uri, userName, password, database) tasks = set(map(str.strip, json.loads(tasks))) + count_response = "" start = time.time() if "materialize_text_chunk_similarities" in tasks: await asyncio.to_thread(update_graph, graph) @@ -342,12 +343,14 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database graphDb_data_Access = graphDBdataAccess(graph) document_name = "" count_response = graphDb_data_Access.update_node_relationship_count(document_name) - logging.info(f'Updated source node with community related counts') + if count_response: + count_response = [{"filename": filename, **counts} for filename, counts in count_response.items()] + logging.info(f'Updated source node with community related counts') end = time.time() elapsed_time = end - start json_obj = {'api_name': api_name, 'db_url': uri, 'userName':userName, 'database':database, 'tasks':tasks, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} - logger.log_struct(json_obj) - return create_api_response('Success', message='All tasks completed successfully') + # logger.log_struct(json_obj) + return create_api_response('Success', data=count_response, message='All tasks completed successfully') except Exception as e: job_status = "Failed" diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 2bb7848ee..bf96532d9 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -175,83 +175,89 @@ """ NODEREL_COUNT_QUERY_WITH_COMMUNITY = """ -MATCH docs = (d:Document) +MATCH (d:Document) WHERE d.fileName IS NOT NULL -CALL (d) { - OPTIONAL MATCH (d)<-[po:PART_OF]-(c:Chunk) - OPTIONAL MATCH (c)-[he:HAS_ENTITY]->(e:__Entity__) - OPTIONAL MATCH (c)-[sim:SIMILAR]->(c2:Chunk) - OPTIONAL MATCH (c)-[nc:NEXT_CHUNK]->(c3:Chunk) - WITH d, count(distinct c) AS chunkNodeCount, - count(distinct po) AS partOfRelCount, - count(distinct he) AS hasEntityRelCount, - count(distinct sim) AS similarRelCount, - count(distinct nc) AS nextChunkRelCount, - count(distinct e) AS entityNodeCount, - collect(distinct e) AS entities - CALL (entities) { - UNWIND entities AS e - RETURN sum(COUNT { (e)-->(e2:__Entity__) WHERE e2 in entities }) AS entityEntityRelCount - } - CALL (entities) { - UNWIND entities AS e - OPTIONAL MATCH (e)-[ic:IN_COMMUNITY]->(comm:__Community__) - WITH collect(distinct comm) AS base_communities, collect(distinct ic) AS inCommunityRels - // Step 2: Find first-level parents and relationships - UNWIND base_communities AS base_comm - OPTIONAL MATCH (base_comm)-[pc1:PARENT_COMMUNITY]->(first_level:__Community__) - WITH base_communities, inCommunityRels, collect(distinct first_level) AS first_level_parents, - collect(distinct pc1) AS parentCommunityRels1 - // Step 3: Find second-level parents and relationships - UNWIND first_level_parents AS first_level - OPTIONAL MATCH (first_level)-[pc2:PARENT_COMMUNITY]->(second_level:__Community__) - WITH base_communities, inCommunityRels, first_level_parents, parentCommunityRels1, - collect(distinct second_level) AS second_level_parents, collect(distinct pc2) AS parentCommunityRels2 - // Step 4: Find third-level parents and relationships - UNWIND second_level_parents AS second_level - OPTIONAL MATCH (second_level)-[pc3:PARENT_COMMUNITY]->(third_level:__Community__) - WITH base_communities, inCommunityRels, first_level_parents, second_level_parents, parentCommunityRels1, parentCommunityRels2, - collect(distinct third_level) AS third_level_parents, collect(distinct pc3) AS parentCommunityRels3 - // Aggregate all communities and relationships - WITH - base_communities + coalesce(first_level_parents, []) + coalesce(second_level_parents, []) + coalesce(third_level_parents, []) AS all_communities, - inCommunityRels + coalesce(parentCommunityRels1, []) + coalesce(parentCommunityRels2, []) + coalesce(parentCommunityRels3, []) AS all_rels - RETURN size(all_communities) AS communityNodeCount, size(all_rels) AS communityRelCount - } - RETURN d.fileName AS filename, chunkNodeCount, - partOfRelCount + hasEntityRelCount + similarRelCount + nextChunkRelCount AS chunkRelCount, - entityNodeCount, entityEntityRelCount, communityNodeCount, communityRelCount +OPTIONAL MATCH (d)<-[po:PART_OF]-(c:Chunk) +OPTIONAL MATCH (c)-[he:HAS_ENTITY]->(e:__Entity__) +OPTIONAL MATCH (c)-[sim:SIMILAR]->(c2:Chunk) +OPTIONAL MATCH (c)-[nc:NEXT_CHUNK]->(c3:Chunk) +OPTIONAL MATCH (e)-[ic:IN_COMMUNITY]->(comm:__Community__) +OPTIONAL MATCH (comm)-[pc1:PARENT_COMMUNITY]->(first_level:__Community__) +OPTIONAL MATCH (first_level)-[pc2:PARENT_COMMUNITY]->(second_level:__Community__) +OPTIONAL MATCH (second_level)-[pc3:PARENT_COMMUNITY]->(third_level:__Community__) +WITH + d.fileName AS filename, + count(DISTINCT c) AS chunkNodeCount, + count(DISTINCT po) AS partOfRelCount, + count(DISTINCT he) AS hasEntityRelCount, + count(DISTINCT sim) AS similarRelCount, + count(DISTINCT nc) AS nextChunkRelCount, + count(DISTINCT e) AS entityNodeCount, + collect(DISTINCT e) AS entities, + count(DISTINCT comm) AS baseCommunityCount, + count(DISTINCT first_level) AS firstlevelcommCount, + count(DISTINCT second_level) AS secondlevelcommCount, + count(DISTINCT third_level) AS thirdlevelcommCount, + count(DISTINCT ic) AS inCommunityCount, + count(DISTINCT pc1) AS parentCommunityRelCount1, + count(DISTINCT pc2) AS parentCommunityRelCount2, + count(DISTINCT pc3) AS parentCommunityRelCount3 +WITH + filename, + chunkNodeCount, + partOfRelCount + hasEntityRelCount + similarRelCount + nextChunkRelCount AS chunkRelCount, + entityNodeCount, + entities, + baseCommunityCount + firstlevelcommCount + secondlevelcommCount + thirdlevelcommCount AS commCount, + inCommunityCount + parentCommunityRelCount1 + parentCommunityRelCount2 + parentCommunityRelCount3 AS communityRelCount +CALL (entities) { + UNWIND entities AS e + RETURN sum(COUNT { (e)-->(e2:__Entity__) WHERE e2 in entities }) AS entityEntityRelCount } -RETURN filename, chunkNodeCount, chunkRelCount, entityNodeCount, entityEntityRelCount, communityNodeCount, communityRelCount -ORDER BY d.createdAt DESC +RETURN + filename, + COALESCE(chunkNodeCount, 0) AS chunkNodeCount, + COALESCE(chunkRelCount, 0) AS chunkRelCount, + COALESCE(entityNodeCount, 0) AS entityNodeCount, + COALESCE(entityEntityRelCount, 0) AS entityEntityRelCount, + COALESCE(commCount, 0) AS communityNodeCount, + COALESCE(communityRelCount, 0) AS communityRelCount """ NODEREL_COUNT_QUERY_WITHOUT_COMMUNITY = """ -MATCH docs = (d:Document) +MATCH (d:Document) WHERE d.fileName = $document_name -CALL (d) { - OPTIONAL MATCH (d)<-[po:PART_OF]-(c:Chunk) - OPTIONAL MATCH (c)-[he:HAS_ENTITY]->(e:__Entity__) - OPTIONAL MATCH (c)-[sim:SIMILAR]->(c2:Chunk) - OPTIONAL MATCH (c)-[nc:NEXT_CHUNK]->(c3:Chunk) - WITH d, count(distinct c) AS chunkNodeCount, - count(distinct po) AS partOfRelCount, - count(distinct he) AS hasEntityRelCount, - count(distinct sim) AS similarRelCount, - count(distinct nc) AS nextChunkRelCount, - count(distinct e) AS entityNodeCount, - collect(distinct e) AS entities - CALL (entities) { - UNWIND entities AS e - RETURN sum(COUNT { (e)-->(e2:__Entity__) WHERE e2 in entities }) AS entityEntityRelCount - } - RETURN d.fileName AS filename, chunkNodeCount, - partOfRelCount + hasEntityRelCount + similarRelCount + nextChunkRelCount AS chunkRelCount, - entityNodeCount, entityEntityRelCount +OPTIONAL MATCH (d)<-[po:PART_OF]-(c:Chunk) +OPTIONAL MATCH (c)-[he:HAS_ENTITY]->(e:__Entity__) +OPTIONAL MATCH (c)-[sim:SIMILAR]->(c2:Chunk) +OPTIONAL MATCH (c)-[nc:NEXT_CHUNK]->(c3:Chunk) +WITH + d.fileName AS filename, + count(DISTINCT c) AS chunkNodeCount, + count(DISTINCT po) AS partOfRelCount, + count(DISTINCT he) AS hasEntityRelCount, + count(DISTINCT sim) AS similarRelCount, + count(DISTINCT nc) AS nextChunkRelCount, + count(DISTINCT e) AS entityNodeCount, + collect(DISTINCT e) AS entities +WITH + filename, + chunkNodeCount, + partOfRelCount + hasEntityRelCount + similarRelCount + nextChunkRelCount AS chunkRelCount, + entityNodeCount, + entities +CALL (entities) { + UNWIND entities AS e + RETURN sum(COUNT { (e)-->(e2:__Entity__) WHERE e2 in entities }) AS entityEntityRelCount } -RETURN filename, chunkNodeCount, chunkRelCount, entityNodeCount, entityEntityRelCount -ORDER BY d.createdAt DESC +RETURN + filename, + COALESCE(chunkNodeCount, 0) AS chunkNodeCount, + COALESCE(chunkRelCount, 0) AS chunkRelCount, + COALESCE(entityNodeCount, 0) AS entityNodeCount, + COALESCE(entityEntityRelCount, 0) AS entityEntityRelCount """ + ## CHAT SETUP CHAT_MAX_TOKENS = 1000 CHAT_SEARCH_KWARG_SCORE_THRESHOLD = 0.5 diff --git a/frontend/src/components/BreakDownPopOver.tsx b/frontend/src/components/BreakDownPopOver.tsx new file mode 100644 index 000000000..4905497e3 --- /dev/null +++ b/frontend/src/components/BreakDownPopOver.tsx @@ -0,0 +1,33 @@ +import CustomPopOver from './UI/CustomPopOver'; +import { IconButton } from '@neo4j-ndl/react'; +import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; +import { CustomFileBase } from '../types'; +import { useCredentials } from '../context/UserCredentials'; + +export default function BreakDownPopOver({ file, isNodeCount = true }: { file: CustomFileBase; isNodeCount: boolean }) { + const { isGdsActive } = useCredentials(); + + return ( + + + + } + > + {isNodeCount ? ( +
              +
            • Chunk Nodes: {file.chunkNodeCount}
            • +
            • Entity Nodes: {file.entityNodeCount}
            • + {isGdsActive &&
            • Community Nodes: {file.communityNodeCount}
            • } +
            + ) : ( +
              +
            • Chunk Relations: {file.chunkRelCount}
            • +
            • Entity Relations: {file.entityEntityRelCount}
            • + {isGdsActive &&
            • Community Relations: {file.communityRelCount}
            • } +
            + )} +
            + ); +} diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 12b0705bf..552dc9506 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -177,8 +177,39 @@ const Content: React.FC = ({ if (processedCount === 1 && queue.isEmpty()) { (async () => { showNormalToast(); - await postProcessing(userCredentials as UserCredentials, postProcessingTasks); - showSuccessToast('All Q&A functionality is available now.'); + try { + const response = await postProcessing(userCredentials as UserCredentials, postProcessingTasks); + if (response.data.status === 'Success') { + const communityfiles = response.data.data; + communityfiles.forEach((c: any) => { + setFilesData((prev) => { + return prev.map((f) => { + if (f.name === c.filename) { + return { + ...f, + chunkNodeCount: c.chunkNodeCount ?? 0, + entityNodeCount: c.entityNodeCount ?? 0, + communityNodeCount: c.communityNodeCount ?? 0, + chunkRelCount: c.chunkRelCount ?? 0, + entityEntityRelCount: c.entityEntityRelCount ?? 0, + communityRelCount: c.communityRelCount ?? 0, + nodesCount: c.nodeCount, + relationshipsCount: c.relationshipCount, + }; + } + return f; + }); + }); + }); + showSuccessToast('All Q&A functionality is available now.'); + } else { + throw new Error(response.data.error); + } + } catch (error) { + if (error instanceof Error) { + showSuccessToast(error.message); + } + } })(); } }, [processedCount, userCredentials, queue, isReadOnlyUser, isGdsActive]); @@ -431,8 +462,39 @@ const Content: React.FC = ({ const addFilesToQueue = async (remainingFiles: CustomFile[]) => { if (!remainingFiles.length) { showNormalToast(); - await postProcessing(userCredentials as UserCredentials, postProcessingTasks); - showSuccessToast('All Q&A functionality is available now.'); + try { + const response = await postProcessing(userCredentials as UserCredentials, postProcessingTasks); + if (response.data.status === 'Success') { + const communityfiles = response.data.data; + communityfiles.forEach((c: any) => { + setFilesData((prev) => { + return prev.map((f) => { + if (f.name === c.filename) { + return { + ...f, + chunkNodeCount: c.chunkNodeCount ?? 0, + entityNodeCount: c.entityNodeCount ?? 0, + communityNodeCount: c.communityNodeCount ?? 0, + chunkRelCount: c.chunkRelCount ?? 0, + entityEntityRelCount: c.entityEntityRelCount ?? 0, + communityRelCount: c.communityRelCount ?? 0, + nodesCount: c.nodeCount, + relationshipsCount: c.relationshipCount, + }; + } + return f; + }); + }); + }); + showSuccessToast('All Q&A functionality is available now.'); + } else { + throw new Error(response.data.error); + } + } catch (error) { + if (error instanceof Error) { + showSuccessToast(error.message); + } + } } for (let index = 0; index < remainingFiles.length; index++) { const f = remainingFiles[index]; diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index f02e7fb7f..53903d032 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -3,7 +3,6 @@ import { DataGridComponents, Flex, IconButton, - Popover, ProgressBar, StatusIndicator, TextLink, @@ -42,7 +41,6 @@ import { ClipboardDocumentIconSolid, MagnifyingGlassCircleIconSolid, DocumentTextIconSolid, - InformationCircleIconOutline, } from '@neo4j-ndl/react/icons'; import CustomProgressBar from './UI/CustomProgressBar'; import subscribe from '../services/PollingAPI'; @@ -55,6 +53,7 @@ import { IconButtonWithToolTip } from './UI/IconButtonToolTip'; import { batchSize, largeFileSize, llms } from '../utils/Constants'; import { showErrorToast, showNormalToast } from '../utils/toasts'; import { ThemeWrapperContext } from '../context/ThemeWrapper'; +import BreakDownPopOver from './BreakDownPopOver'; let onlyfortheFirstRender = true; @@ -513,25 +512,13 @@ const FileTable = forwardRef((props, ref) => { info.row.original.chunkNodeCount > 0 || info.row.original.communityNodeCount > 0 || info.row.original.entityNodeCount > 0; + return ( {info.getValue()} {hasNodeBreakDownValues && (info.row.original.status === 'Completed' || info.row.original.status === 'Failed') && ( - - - - - - - -
              -
            • Chunk Nodes: {info.row.original.chunkNodeCount}
            • -
            • Entity Nodes: {info.row.original.entityNodeCount}
            • -
            • Community Nodes: {info.row.original.communityNodeCount}
            • -
            -
            -
            + )}
            ); @@ -551,20 +538,7 @@ const FileTable = forwardRef((props, ref) => { {info.getValue()} {hasRelationsBreakDownValues && (info.row.original.status === 'Completed' || info.row.original.status === 'Failed') && ( - - - - - - - -
              -
            • Chunk Relations: {info.row.original.chunkRelCount}
            • -
            • Entity Relations: {info.row.original.entityEntityRelCount}
            • -
            • Community Relations: {info.row.original.communityRelCount}
            • -
            -
            -
            + )} ); diff --git a/frontend/src/components/UI/CustomPopOver.tsx b/frontend/src/components/UI/CustomPopOver.tsx new file mode 100644 index 000000000..a15a9d36b --- /dev/null +++ b/frontend/src/components/UI/CustomPopOver.tsx @@ -0,0 +1,15 @@ +import { Popover } from '@neo4j-ndl/react'; +import { FunctionComponent, ReactNode } from 'react'; +type Props = { + children: ReactNode; + Trigger: ReactNode; +}; +const CustomPopOver: FunctionComponent = ({ children, Trigger }) => { + return ( + + {Trigger} + {children} + + ); +}; +export default CustomPopOver; From 8af533cd149f7534b5dccb6dd8f6d5969a27bf34 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Mon, 25 Nov 2024 20:07:05 +0530 Subject: [PATCH 261/292] format and checked fixes (#897) --- frontend/src/components/ChatBot/Chatbot.tsx | 12 ++++++------ .../Deduplication/index.tsx | 4 ++-- .../SelectedJobList.tsx | 4 ++-- .../PostProcessingCheckList/index.tsx | 11 ++++------- frontend/src/context/UsersFiles.tsx | 19 ++++++++++++------- frontend/src/utils/Utils.ts | 2 +- 6 files changed, 27 insertions(+), 25 deletions(-) diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 03d027d04..97efb8058 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -249,7 +249,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg ) ); } @@ -264,7 +264,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg ) ); } @@ -273,7 +273,7 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId + msg.id === chatbotMessageId ? { ...msg, modes: { @@ -281,7 +281,7 @@ const Chatbot: FC = (props) => { [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, }, } - : msg) + : msg ) ); } @@ -294,7 +294,7 @@ const Chatbot: FC = (props) => { if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId + msg.id === chatbotMessageId ? { ...msg, isLoading: false, @@ -306,7 +306,7 @@ const Chatbot: FC = (props) => { }, }, } - : msg) + : msg ) ); } diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 97c584b7c..f1d469cf2 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -104,12 +104,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - (d.e.elementId === nodeid + d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d) + : d ); }); }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx index 447b1f846..fc5f39206 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx @@ -11,11 +11,11 @@ export default function SelectedJobList({ }) { const ongoingPostProcessingTasks = useMemo( () => - (isGdsActive + isGdsActive ? postProcessingTasks.includes('enable_communities') ? postProcessingTasks : postProcessingTasks.filter((s) => s != 'enable_communities') - : postProcessingTasks.filter((s) => s != 'enable_communities')), + : postProcessingTasks.filter((s) => s != 'enable_communities'), [isGdsActive, postProcessingTasks] ); return ( diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx index d5ebc04a2..d2b721995 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx @@ -9,6 +9,9 @@ export default function PostProcessingCheckList() { const tablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); const { postProcessingTasks, setPostProcessingTasks } = useFileContext(); const { isGdsActive } = useCredentials(); + const handleCheckboxChange = (jobTitle: string, isChecked: boolean) => { + setPostProcessingTasks((prev) => (isChecked ? [...prev, jobTitle] : prev.filter((task) => task !== jobTitle))); + }; return (
            @@ -37,14 +40,8 @@ export default function PostProcessingCheckList() { ? isGdsActive && postProcessingTasks.includes(job.title) : postProcessingTasks.includes(job.title) } - onChange={(e) => { - if (e.target.checked) { - setPostProcessingTasks((prev) => [...prev, job.title]); - } else { - setPostProcessingTasks((prev) => prev.filter((s) => s !== job.title)); - } - }} isDisabled={isCreateCommunities && !isGdsActive} + onChange={(e) => handleCheckboxChange(job.title, e.target.checked)} ariaLabel='checkbox-postProcessing' /> {job.description} diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index 25f031f95..92f4a4b77 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -16,7 +16,7 @@ const FileContextProvider: FC = ({ children }) => { const selectedNodeLabelstr = localStorage.getItem('selectedNodeLabels'); const selectedNodeRelsstr = localStorage.getItem('selectedRelationshipLabels'); const persistedQueue = localStorage.getItem('waitingQueue'); - const { userCredentials } = useCredentials(); + const { userCredentials, isGdsActive } = useCredentials(); const [files, setFiles] = useState<(File | null)[] | []>([]); const [filesData, setFilesData] = useState([]); const [queue, setQueue] = useState( @@ -35,12 +35,17 @@ const FileContextProvider: FC = ({ children }) => { triggeredFrom: '', show: false, }); - const [postProcessingTasks, setPostProcessingTasks] = useState([ - 'materialize_text_chunk_similarities', - 'enable_hybrid_search_and_fulltext_search_in_bloom', - 'materialize_entity_similarities', - 'enable_communities', - ]); + const [postProcessingTasks, setPostProcessingTasks] = useState(() => { + const tasks = [ + 'materialize_text_chunk_similarities', + 'enable_hybrid_search_and_fulltext_search_in_bloom', + 'materialize_entity_similarities', + ]; + if (isGdsActive) { + tasks.push('enable_communities'); + } + return tasks; + }); const [processedCount, setProcessedCount] = useState(0); const [postProcessingVal, setPostProcessingVal] = useState(false); diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 7314c55e7..82217535d 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -395,7 +395,7 @@ export const isFileCompleted = (waitingFile: CustomFile, item: SourceNode) => waitingFile && item.status === 'Completed'; export const calculateProcessedCount = (prev: number, batchSize: number) => - (prev === batchSize ? batchSize - 1 : prev + 1); + prev === batchSize ? batchSize - 1 : prev + 1; export const isProcessingFileValid = (item: SourceNode, userCredentials: UserCredentials) => { return item.status === 'Processing' && item.fileName != undefined && userCredentials && userCredentials.database; From 4e203840b6703242930ac4837a80f6d9bbfe96bd Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:08:17 +0530 Subject: [PATCH 262/292] added info to show 50 chunks processing (#899) --- .../src/components/Graph/GraphViewModal.tsx | 23 ++++++++++++------- frontend/src/utils/Constants.ts | 1 + 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 4fe1f52f5..af5a7a90a 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -17,6 +17,7 @@ import type { Node, Relationship } from '@neo4j-nvl/base'; import { ArrowPathIconOutline, FitToScreenIcon, + InformationCircleIconOutline, MagnifyingGlassMinusIconOutline, MagnifyingGlassPlusIconOutline, } from '@neo4j-ndl/react/icons'; @@ -62,10 +63,10 @@ const GraphViewModal: React.FunctionComponent = ({ graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -107,10 +108,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -348,6 +349,12 @@ const GraphViewModal: React.FunctionComponent = ({ > {headerTitle} + {viewPoint !== graphLabels.chatInfoView && (
            + + + + {graphLabels.chunksInfo} +
            )} {checkBoxView && ( Date: Tue, 26 Nov 2024 04:53:43 +0000 Subject: [PATCH 263/292] format and lint fixes --- frontend/src/components/BreakDownPopOver.tsx | 2 +- frontend/src/components/ChatBot/Chatbot.tsx | 12 ++++---- .../src/components/Graph/GraphViewModal.tsx | 30 ++++++++++--------- .../Deduplication/index.tsx | 4 +-- .../SelectedJobList.tsx | 4 +-- .../Popups/RetryConfirmation/Index.tsx | 3 +- frontend/src/utils/Constants.ts | 2 +- frontend/src/utils/Utils.ts | 2 +- 8 files changed, 31 insertions(+), 28 deletions(-) diff --git a/frontend/src/components/BreakDownPopOver.tsx b/frontend/src/components/BreakDownPopOver.tsx index 4905497e3..01ebc4f20 100644 --- a/frontend/src/components/BreakDownPopOver.tsx +++ b/frontend/src/components/BreakDownPopOver.tsx @@ -22,7 +22,7 @@ export default function BreakDownPopOver({ file, isNodeCount = true }: { file: C {isGdsActive &&
          3. Community Nodes: {file.communityNodeCount}
          4. }
      ) : ( -
        +
        • Chunk Relations: {file.chunkRelCount}
        • Entity Relations: {file.entityEntityRelCount}
        • {isGdsActive &&
        • Community Relations: {file.communityRelCount}
        • } diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 97efb8058..03d027d04 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -249,7 +249,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) ) ); } @@ -264,7 +264,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) ) ); } @@ -273,7 +273,7 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId + (msg.id === chatbotMessageId ? { ...msg, modes: { @@ -281,7 +281,7 @@ const Chatbot: FC = (props) => { [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, }, } - : msg + : msg) ) ); } @@ -294,7 +294,7 @@ const Chatbot: FC = (props) => { if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId + (msg.id === chatbotMessageId ? { ...msg, isLoading: false, @@ -306,7 +306,7 @@ const Chatbot: FC = (props) => { }, }, } - : msg + : msg) ) ); } diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index af5a7a90a..27610680d 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -63,10 +63,10 @@ const GraphViewModal: React.FunctionComponent = ({ graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -108,10 +108,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -349,12 +349,14 @@ const GraphViewModal: React.FunctionComponent = ({ > {headerTitle} - {viewPoint !== graphLabels.chatInfoView && (
          - - - - {graphLabels.chunksInfo} -
          )} + {viewPoint !== graphLabels.chatInfoView && ( +
          + + + + {graphLabels.chunksInfo} +
          + )} {checkBoxView && ( { setDuplicateNodes((prev) => { return prev.map((d) => - d.e.elementId === nodeid + (d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d + : d) ); }); }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx index fc5f39206..447b1f846 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx @@ -11,11 +11,11 @@ export default function SelectedJobList({ }) { const ongoingPostProcessingTasks = useMemo( () => - isGdsActive + (isGdsActive ? postProcessingTasks.includes('enable_communities') ? postProcessingTasks : postProcessingTasks.filter((s) => s != 'enable_communities') - : postProcessingTasks.filter((s) => s != 'enable_communities'), + : postProcessingTasks.filter((s) => s != 'enable_communities')), [isGdsActive, postProcessingTasks] ); return ( diff --git a/frontend/src/components/Popups/RetryConfirmation/Index.tsx b/frontend/src/components/Popups/RetryConfirmation/Index.tsx index 379be9598..4ae9b18eb 100644 --- a/frontend/src/components/Popups/RetryConfirmation/Index.tsx +++ b/frontend/src/components/Popups/RetryConfirmation/Index.tsx @@ -30,7 +30,8 @@ function RetryConfirmationDialog({ Reprocess Options - Clicking "Continue" will mark these files as "Ready to Reprocess." To proceed, click “Generate Graph” to start the reprocessing process. + Clicking "Continue" will mark these files as "Ready to Reprocess." To proceed, click “Generate Graph” to start + the reprocessing process. {alertStatus.showAlert && ( diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 3488c2764..7cf35f506 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -295,7 +295,7 @@ export const graphLabels = { community: 'Communities', noNodesRels: 'No Nodes and No relationships', neighborView: 'neighborView', - chunksInfo:'We are processing 50 chunks at a time' + chunksInfo: 'We are processing 50 chunks at a time', }; export const RESULT_STEP_SIZE = 25; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 82217535d..7314c55e7 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -395,7 +395,7 @@ export const isFileCompleted = (waitingFile: CustomFile, item: SourceNode) => waitingFile && item.status === 'Completed'; export const calculateProcessedCount = (prev: number, batchSize: number) => - prev === batchSize ? batchSize - 1 : prev + 1; + (prev === batchSize ? batchSize - 1 : prev + 1); export const isProcessingFileValid = (item: SourceNode, userCredentials: UserCredentials) => { return item.status === 'Processing' && item.fileName != undefined && userCredentials && userCredentials.database; From f76d6848450bc6129394fd4a3ad99235649b8518 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Tue, 26 Nov 2024 11:07:44 +0530 Subject: [PATCH 264/292] Env changes (#896) * env changes with state management * format and lint fixes * Update frontend_docs.adoc * Update backend_docs.adoc * state changes * error handling * button handling --- backend/score.py | 18 +- docs/backend/backend_docs.adoc | 23 ++ docs/frontend/frontend_docs.adoc | 74 ++++-- .../components/ChatBot/ChatOnlyComponent.tsx | 20 +- frontend/src/components/ChatBot/Chatbot.tsx | 2 +- frontend/src/components/Content.tsx | 207 ++++++++--------- .../src/components/Layout/DrawerDropzone.tsx | 102 ++++----- frontend/src/components/Layout/Header.tsx | 18 ++ frontend/src/components/Layout/PageLayout.tsx | 210 +++++++++++++++++- .../ConnectionModal/ConnectionModal.tsx | 10 +- frontend/src/context/UserCredentials.tsx | 19 +- frontend/src/services/ConnectAPI.ts | 14 +- frontend/src/services/GetFiles.ts | 4 +- frontend/src/types.ts | 9 + 14 files changed, 517 insertions(+), 213 deletions(-) diff --git a/backend/score.py b/backend/score.py index ac2468066..a64d22fca 100644 --- a/backend/score.py +++ b/backend/score.py @@ -557,6 +557,11 @@ def decode_password(pwd): decoded_password = sample_string_bytes.decode("utf-8") return decoded_password +def encode_password(pwd): + data_bytes = pwd.encode('ascii') + encoded_pwd_bytes = base64.b64encode(data_bytes) + return encoded_pwd_bytes + @app.get("/update_extract_status/{file_name}") async def update_extract_status(request:Request, file_name, url, userName, password, database): async def generate(): @@ -960,16 +965,25 @@ async def backend_connection_configuation(): print(f'login connection status of object: {graph}') if graph is not None: graph_connection = True - return create_api_response('Success',message=f"Backend connection successful",data=graph_connection) + isURI = os.getenv('NEO4J_URI') + isUsername= os.getenv('NEO4J_USERNAME') + isDatabase= os.getenv('NEO4J_DATABASE') + isPassword= os.getenv('NEO4J_PASSWORD') + encoded_password = encode_password(isPassword) + graphDb_data_Access = graphDBdataAccess(graph) + gds_status = graphDb_data_Access.check_gds_version() + write_access = graphDb_data_Access.check_account_access(database=isDatabase) + return create_api_response('Success',message=f"Backend connection successful",data={'graph_connection':graph_connection,'uri':isURI,'user_name':isUsername,'database':isDatabase,'password':encoded_password,'gds_status':gds_status,'write_access':write_access}) else: graph_connection = False return create_api_response('Success',message=f"Backend connection is not successful",data=graph_connection) except Exception as e: + graph_connection = False job_status = "Failed" message="Unable to connect backend DB" error_message = str(e) logging.exception(f'{error_message}') - return create_api_response(job_status, message=message, error=error_message) + return create_api_response(job_status, message=message, error=error_message + ' or fill from the login dialog', data=graph_connection) finally: gc.collect() diff --git a/docs/backend/backend_docs.adoc b/docs/backend/backend_docs.adoc index d313a3402..7f1fd11dc 100644 --- a/docs/backend/backend_docs.adoc +++ b/docs/backend/backend_docs.adoc @@ -979,3 +979,26 @@ The API responsible for a evaluating chatbot responses on the basis of different } } .... +=== Backend Database connection +---- +POST /backend_connection_configuation +---- + +The API responsible for create the connection obj from Neo4j DB based on environment variable and return the status for show/hide login dialog on UI + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": true, + "message": "Backend connection successful" +} +.... + +.... +{ + "status": "Failed", + "error": "Could not connect to Neo4j database. Please ensure that the username and password are correct", + "message": "Unable to connect backend DB" +} diff --git a/docs/frontend/frontend_docs.adoc b/docs/frontend/frontend_docs.adoc index 9eaf1e4bc..34e71f254 100644 --- a/docs/frontend/frontend_docs.adoc +++ b/docs/frontend/frontend_docs.adoc @@ -18,53 +18,80 @@ This document provides a comprehensive guide for developers on how we build a Re . ├── Components | ├─ ChatBot - | | ├─ ChatBotInfoModal + | | ├─ ChatInfoModal | | ├─ ChatModeToggle | | ├─ ExpandedChatButtonContainer + | | ├─ ChatModesSwitch + | | ├─ ChatOnlyComponent + | | ├─ ChatInfo + | | ├─ CommonChatActions + | | ├─ CommunitiesInfo + | | ├─ EntitiesInfo + | | ├─ MetricsCheckbox + | | ├─ MetricsTab + | | ├─ MultiModeMetrics + | | ├─ SourcesInfo | ├─ Data Sources | | ├─ AWS | | ├─ GCS | | ├─ Local - | | ├─ WebSources - | | | ├─Web - | | | ├─Wikipedia - | | | ├─Youtube + | | ├─ Web + | | | ├─ WebButton | ├─ Graph + | | ├─ CheckboxSelection + | | ├─ GraphPropertiesPanel + | | ├─ GraphPropertiesTable | | ├─ GraphViewButton | | ├─ GraphViewModal | | ├─ LegendsChip + | | ├─ ResizePanel + | | ├─ ResultOverview | ├─ Layout - | | ├─ Content + | | ├─ AlertIcon | | ├─ DrawerChatbot | | ├─ DrawerDropzone | | ├─ Header | | ├─ PageLayout | | ├─ SideNav | ├─ Popups + | | ├─ ChunkPopUp | | ├─ ConnectionModal | | ├─ DeletePopup | | ├─ GraphEnhancementDialog | | ├─ LargeFilePopup + | | ├─ RetryConfirmation | | ├─ Settings | ├─ UI | | ├─ Alert | | ├─ ButtonWithTooltip | | ├─ CustomButton - | | ├─ CustomModal + | | ├─ CustomCheckBox + | | ├─ CustomMenu | | ├─ CustomProgressBar - | | ├─ CustomSourceInput - | | ├─ Dropdown + | | ├─ DatabaseIcon + | | ├─ DatabaseStatusIcon | | ├─ ErrorBoundary - | | ├─ FileTable - | | ├─ GenericSourceButton - | | ├─ GenericSourceModal + | | ├─ FallBackDialog | | ├─ HoverableLink | | ├─ IconButtonTooltip | | ├─ Legend - | | ├─ Menu - | | ├─ QuickStarter + | | ├─ ScienceMolecule + | | ├─ ShowAll + | | ├─ TipWrapper + | ├─ Websources + | | ├─ Web + | | ├─ Wikipedia + | | ├─ Youtube + | | ├─ CustomSourceInput + | | ├─ GenericSourceButton + | | ├─ GenericSourceModal + | ├─ Content + | ├─ Dropdown + | ├─ FileTable + | ├─ QuickStarter ├── HOC - | ├─ SettingModalHOC + | ├─ CustomModal + | ├─ withVisibility ├── Assets | ├─ images | | ├─ Application Images @@ -87,8 +114,14 @@ This document provides a comprehensive guide for developers on how we build a Re | ├─ constants | ├─ FileAPI | ├─ Loader - | ├─ Types + | ├─ Queue + | ├─ toats | ├─ utils + ├── App + ├── index + ├── main + ├── router + ├── types └── README.md == Application @@ -98,7 +131,10 @@ Added Node.js with version v21.1.0 and npm on the development machine. Install necessary dependencies by running yarn install, such as axios for making HTTP requests and others to interact with the graph. == 2. Connect to the Neo4j Aura instance: -Created a connection modal by adding details including protocol, URI, database name, username, and password. Added a submit button that triggers an API: ***/connect*** and accepts params like uri, password, username and database to establish a connection to the Neo4j Aura instance. Handled the authentication and error scenarios appropriately, by displaying relevant messages. To check whether the backend connection is up and working we hit the API: ***/health*** +Created a connection modal by adding details including protocol, URI, database name, username, and password. Added a submit button that triggers an API: ***/connect*** and accepts params like uri, password, username and database to establish a connection to the Neo4j Aura instance. Handled the authentication and error scenarios appropriately, by displaying relevant messages. To check whether the backend connection is up and working we hit the API: ***/health.*** The user can now access both AURA DS and AURA DB instances. + +* If GDS Connection is there icon is scientific molecule > Graph enhancement model > Post processing jobs > gives user the leverage to check and uncheck the communities checkbox. +* If AURA DB > icon is database icon > Graph enhancement model > Post processing jobs > communities checkbox is disabled. * Before Connection : @@ -241,6 +277,10 @@ User can delete all number/selected files from the table. image::images/DeleteFiles.jpg[DeleteFiles, 600] +* ***Chat Only Mode*** + +User can also use the chat only feature by navigating to the url https://dev-frontend-dcavk67s4a-uc.a.run.app/chat-only to ask questions related to documents which have been completely processed. User is required to pass the login credentials to connect to the database. + == 8. Interface Design: Designed a user-friendly interface that guides users through the process of connecting to Neo4j Aura, accessing file sources, uploading PDF files, and generating graphs. diff --git a/frontend/src/components/ChatBot/ChatOnlyComponent.tsx b/frontend/src/components/ChatBot/ChatOnlyComponent.tsx index 45accddcd..6d1e94b59 100644 --- a/frontend/src/components/ChatBot/ChatOnlyComponent.tsx +++ b/frontend/src/components/ChatBot/ChatOnlyComponent.tsx @@ -9,10 +9,11 @@ import Header from '../Layout/Header'; import { clearChatAPI } from '../../services/QnaAPI'; import { ChatProps, connectionState, Messages, UserCredentials } from '../../types'; import { getIsLoading } from '../../utils/Utils'; +import ThemeWrapper from '../../context/ThemeWrapper'; const ChatContent: React.FC = ({ chatMessages }) => { const { clearHistoryData, messages, setMessages, setClearHistoryData } = useMessageContext(); - const { setUserCredentials, setConnectionStatus, connectionStatus } = useCredentials(); + const { setUserCredentials, setConnectionStatus, connectionStatus, setShowDisconnectButton } = useCredentials(); const [showBackButton, setShowBackButton] = useReducer((state) => !state, false); const [openConnection, setOpenConnection] = useState({ openPopUp: false, @@ -58,6 +59,7 @@ const ChatContent: React.FC = ({ chatMessages }) => { */ const handleConnectionSuccess = () => { setConnectionStatus(true); + setShowDisconnectButton(true); setOpenConnection((prev) => ({ ...prev, openPopUp: false })); const urlParams = new URLSearchParams(window.location.search); urlParams.delete('openModal'); @@ -142,13 +144,15 @@ const ChatOnlyComponent: React.FC = () => { const location = useLocation(); const chatMessages = (location.state?.messages as Messages[]) || []; return ( - - - - - - - + + + + + + + + + ); }; export default ChatOnlyComponent; diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 03d027d04..4cf1b12bb 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -411,7 +411,7 @@ const Chatbot: FC = (props) => {
          diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 552dc9506..48ef941ee 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -12,13 +12,12 @@ import { OptionType, UserCredentials, chunkdata, - connectionState, } from '../types'; import deleteAPI from '../services/DeleteFiles'; import { postProcessing } from '../services/PostProcessing'; import { triggerStatusUpdateAPI } from '../services/ServerSideStatusUpdateAPI'; import useServerSideEvent from '../hooks/useSse'; -import { useSearchParams } from 'react-router-dom'; +// import { useSearchParams } from 'react-router-dom'; import { batchSize, buttonCaptions, @@ -30,7 +29,6 @@ import { tooltips, } from '../utils/Constants'; import ButtonWithToolTip from './UI/ButtonWithToolTip'; -import connectAPI from '../services/ConnectAPI'; import DropdownComponent from './Dropdown'; import GraphViewModal from './Graph/GraphViewModal'; import { lazy } from 'react'; @@ -48,7 +46,6 @@ import PostProcessingToast from './Popups/GraphEnhancementDialog/PostProcessingC import { getChunkText } from '../services/getChunkText'; import ChunkPopUp from './Popups/ChunkPopUp'; -const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); let afterFirstRender = false; @@ -60,29 +57,17 @@ const Content: React.FC = ({ setIsSchema, showEnhancementDialog, toggleEnhancementDialog, + setOpenConnection, + showDisconnectButton, + connectionStatus, }) => { const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); - const [init, setInit] = useState(false); - const [openConnection, setOpenConnection] = useState({ - openPopUp: false, - chunksExists: false, - vectorIndexMisMatch: false, - chunksExistsWithDifferentDimension: false, - }); + // const [init, setInit] = useState(false); const [openGraphView, setOpenGraphView] = useState(false); const [inspectedName, setInspectedName] = useState(''); const [documentName, setDocumentName] = useState(''); - const { - setUserCredentials, - userCredentials, - connectionStatus, - setConnectionStatus, - isGdsActive, - setGdsActive, - setIsReadOnlyUser, - isReadOnlyUser, - } = useCredentials(); + const { setUserCredentials, userCredentials, setConnectionStatus, isGdsActive, isReadOnlyUser } = useCredentials(); const [showConfirmationModal, setshowConfirmationModal] = useState(false); const [extractLoading, setextractLoading] = useState(false); const [retryFile, setRetryFile] = useState(''); @@ -121,7 +106,7 @@ const Content: React.FC = ({ ); const [showDeletePopUp, setshowDeletePopUp] = useState(false); const [deleteLoading, setdeleteLoading] = useState(false); - const [searchParams] = useSearchParams(); + // const [searchParams] = useSearchParams(); const { updateStatusForLargeFiles } = useServerSideEvent( (inMinutes, time, fileName) => { @@ -141,32 +126,32 @@ const Content: React.FC = ({ setCurrentPage((prev) => prev - 1); await getChunks(documentName, currentPage - 1); }; - useEffect(() => { - if (!init && !searchParams.has('connectURL')) { - let session = localStorage.getItem('neo4j.connection'); - if (session) { - let neo4jConnection = JSON.parse(session); - setUserCredentials({ - uri: neo4jConnection.uri, - userName: neo4jConnection.user, - password: atob(neo4jConnection.password), - database: neo4jConnection.database, - port: neo4jConnection.uri.split(':')[2], - }); - if (neo4jConnection.isgdsActive !== undefined) { - setGdsActive(neo4jConnection.isgdsActive); - } - if (neo4jConnection.isReadOnlyUser !== undefined) { - setIsReadOnlyUser(neo4jConnection.isReadOnlyUser); - } - } else { - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - } - setInit(true); - } else { - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - } - }, []); + // useEffect(() => { + // if (!init && !searchParams.has('connectURL')) { + // let session = localStorage.getItem('neo4j.connection'); + // if (session) { + // let neo4jConnection = JSON.parse(session); + // setUserCredentials({ + // uri: neo4jConnection.uri, + // userName: neo4jConnection.user, + // password: atob(neo4jConnection.password), + // database: neo4jConnection.database, + // port: neo4jConnection.uri.split(':')[2], + // }); + // if (neo4jConnection.isgdsActive !== undefined) { + // setGdsActive(neo4jConnection.isgdsActive); + // } + // if (neo4jConnection.isReadOnlyUser !== undefined) { + // setIsReadOnlyUser(neo4jConnection.isReadOnlyUser); + // } + // } else { + // setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + // } + // setInit(true); + // } else { + // setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + // } + // }, []); useEffect(() => { if (afterFirstRender) { localStorage.setItem('processedCount', JSON.stringify({ db: userCredentials?.uri, count: processedCount })); @@ -228,60 +213,60 @@ const Content: React.FC = ({ } }, [isSchema]); - useEffect(() => { - const connection = localStorage.getItem('neo4j.connection'); - if (connection != null) { - (async () => { - const parsedData = JSON.parse(connection); - const response = await connectAPI( - parsedData.uri, - parsedData.user, - atob(parsedData.password), - parsedData.database - ); - if (response?.data?.status === 'Success') { - localStorage.setItem( - 'neo4j.connection', - JSON.stringify({ - ...parsedData, - userDbVectorIndex: response.data.data.db_vector_dimension, - password: btoa(atob(parsedData.password)), - }) - ); - if (response.data.data.gds_status !== undefined) { - setGdsActive(response.data.data.gds_status); - } - if (response.data.data.write_access !== undefined) { - setIsReadOnlyUser(!response.data.data.write_access); - } - if ( - (response.data.data.application_dimension === response.data.data.db_vector_dimension || - response.data.data.db_vector_dimension == 0) && - !response.data.data.chunks_exists - ) { - setConnectionStatus(true); - setOpenConnection((prev) => ({ ...prev, openPopUp: false })); - } else { - setOpenConnection({ - openPopUp: true, - chunksExists: response.data.data.chunks_exists as boolean, - vectorIndexMisMatch: - response.data.data.db_vector_dimension > 0 && - response.data.data.db_vector_dimension != response.data.data.application_dimension, - chunksExistsWithDifferentDimension: - response.data.data.db_vector_dimension > 0 && - response.data.data.db_vector_dimension != response.data.data.application_dimension && - (response.data.data.chunks_exists ?? true), - }); - setConnectionStatus(false); - } - } else { - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - setConnectionStatus(false); - } - })(); - } - }, []); + // useEffect(() => { + // const connection = localStorage.getItem('neo4j.connection'); + // if (connection != null) { + // (async () => { + // const parsedData = JSON.parse(connection); + // const response = await connectAPI( + // parsedData.uri, + // parsedData.user, + // atob(parsedData.password), + // parsedData.database + // ); + // if (response?.data?.status === 'Success') { + // localStorage.setItem( + // 'neo4j.connection', + // JSON.stringify({ + // ...parsedData, + // userDbVectorIndex: response.data.data.db_vector_dimension, + // password: btoa(atob(parsedData.password)), + // }) + // ); + // if (response.data.data.gds_status !== undefined) { + // setGdsActive(response.data.data.gds_status); + // } + // if (response.data.data.write_access !== undefined) { + // setIsReadOnlyUser(!response.data.data.write_access); + // } + // if ( + // (response.data.data.application_dimension === response.data.data.db_vector_dimension || + // response.data.data.db_vector_dimension == 0) && + // !response.data.data.chunks_exists + // ) { + // setConnectionStatus(true); + // setOpenConnection((prev) => ({ ...prev, openPopUp: false })); + // } else { + // setOpenConnection({ + // openPopUp: true, + // chunksExists: response.data.data.chunks_exists as boolean, + // vectorIndexMisMatch: + // response.data.data.db_vector_dimension > 0 && + // response.data.data.db_vector_dimension != response.data.data.application_dimension, + // chunksExistsWithDifferentDimension: + // response.data.data.db_vector_dimension > 0 && + // response.data.data.db_vector_dimension != response.data.data.application_dimension && + // (response.data.data.chunks_exists ?? true), + // }); + // setConnectionStatus(false); + // } + // } else { + // setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + // setConnectionStatus(false); + // } + // })(); + // } + // }, []); const handleDropdownChange = (selectedOption: OptionType | null | void) => { if (selectedOption?.value) { @@ -880,16 +865,6 @@ const Content: React.FC = ({ )}
          - }> - -
          Neo4j connection {isReadOnlyUser ? '(Read only Mode)' : ''} @@ -944,9 +919,11 @@ const Content: React.FC = ({ {buttonCaptions.connectToNeo4j} ) : ( - + showDisconnectButton && ( + + ) )}
          diff --git a/frontend/src/components/Layout/DrawerDropzone.tsx b/frontend/src/components/Layout/DrawerDropzone.tsx index 6560bc2ad..f84910d3d 100644 --- a/frontend/src/components/Layout/DrawerDropzone.tsx +++ b/frontend/src/components/Layout/DrawerDropzone.tsx @@ -1,7 +1,6 @@ import { Drawer, Flex, StatusIndicator, Typography } from '@neo4j-ndl/react'; import DropZone from '../DataSources/Local/DropZone'; -import React, { useState, useEffect, useMemo, Suspense, lazy } from 'react'; -import { healthStatus } from '../../services/HealthStatus'; +import React, { useMemo, Suspense, lazy } from 'react'; import S3Component from '../DataSources/AWS/S3Bucket'; import { DrawerProps } from '../../types'; import GCSButton from '../DataSources/GCS/GCSButton'; @@ -24,21 +23,8 @@ const DrawerDropzone: React.FC = ({ showGCSModal, showGenericModal, }) => { - const [isBackendConnected, setIsBackendConnected] = useState(false); const { closeAlert, alertState } = useAlertContext(); - const { isReadOnlyUser } = useCredentials(); - - useEffect(() => { - async function getHealthStatus() { - try { - const response = await healthStatus(); - setIsBackendConnected(response.data.healthy); - } catch (error) { - setIsBackendConnected(false); - } - } - getHealthStatus(); - }, []); + const { isReadOnlyUser, isBackendConnected } = useCredentials(); const isYoutubeOnlyCheck = useMemo( () => APP_SOURCES?.includes('youtube') && !APP_SOURCES.includes('wiki') && !APP_SOURCES.includes('web'), @@ -70,9 +56,8 @@ const DrawerDropzone: React.FC = ({
          {process.env.VITE_ENV != 'PROD' && ( @@ -92,31 +77,29 @@ const DrawerDropzone: React.FC = ({
          )} {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( + (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( <> {(APP_SOURCES.includes('youtube') || APP_SOURCES.includes('wiki') || APP_SOURCES.includes('web')) && ( -
          - - -
          - )} +
          + + +
          + )} {APP_SOURCES.includes('s3') && (
          }> @@ -126,9 +109,8 @@ const DrawerDropzone: React.FC = ({ )} {APP_SOURCES.includes('gcs') && (
          }> @@ -157,27 +139,26 @@ const DrawerDropzone: React.FC = ({ {((APP_SOURCES != undefined && APP_SOURCES.includes('youtube')) || (APP_SOURCES != undefined && APP_SOURCES.includes('wiki')) || (APP_SOURCES != undefined && APP_SOURCES.includes('web'))) && ( -
          - - -
          - )} +
          + + +
          + )} {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( + (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( <> {APP_SOURCES != undefined && APP_SOURCES.includes('s3') && (
          }> @@ -187,9 +168,8 @@ const DrawerDropzone: React.FC = ({ )} {APP_SOURCES != undefined && APP_SOURCES.includes('gcs') && (
          = ({ chatOnly, deleteOnClick, setOpenConnecti )} + + {colorMode === 'dark' ? ( + + + + ) : ( + + + + )} +
          { diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 31f3ba8cb..5cfe7bd12 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -1,22 +1,33 @@ -import { useReducer, useState } from 'react'; +import { lazy, Suspense, useEffect, useReducer, useState } from 'react'; import SideNav from './SideNav'; import DrawerDropzone from './DrawerDropzone'; import DrawerChatbot from './DrawerChatbot'; import Content from '../Content'; import { clearChatAPI } from '../../services/QnaAPI'; import { useCredentials } from '../../context/UserCredentials'; -import { UserCredentials } from '../../types'; +import { connectionState, UserCredentials } from '../../types'; import { useMessageContext } from '../../context/UserMessages'; import { useMediaQuery } from '@mui/material'; import { useFileContext } from '../../context/UsersFiles'; import SchemaFromTextDialog from '../Popups/Settings/SchemaFromText'; import useSpeechSynthesis from '../../hooks/useSpeech'; +import FallBackDialog from '../UI/FallBackDialog'; +import { envConnectionAPI } from '../../services/ConnectAPI'; +import { healthStatus } from '../../services/HealthStatus'; + +const ConnectionModal = lazy(() => import('../Popups/ConnectionModal/ConnectionModal')); interface PageLayoutProp { isChatOnly?: boolean; } const PageLayout: React.FC = () => { + const [openConnection, setOpenConnection] = useState({ + openPopUp: false, + chunksExists: false, + vectorIndexMisMatch: false, + chunksExistsWithDifferentDimension: false, + }); const largedesktops = useMediaQuery(`(min-width:1440px )`); const { userCredentials, connectionStatus } = useCredentials(); const [isLeftExpanded, setIsLeftExpanded] = useState(Boolean(largedesktops)); @@ -27,6 +38,7 @@ const PageLayout: React.FC = () => { const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); const [showGenericModal, toggleGenericModal] = useReducer((s) => !s, false); + const toggleLeftDrawer = () => { if (largedesktops) { setIsLeftExpanded(!isLeftExpanded); @@ -44,7 +56,188 @@ const PageLayout: React.FC = () => { const { messages, setClearHistoryData, clearHistoryData, setMessages } = useMessageContext(); const { isSchema, setIsSchema, setShowTextFromSchemaDialog, showTextFromSchemaDialog } = useFileContext(); + const { + setConnectionStatus, + setGdsActive, + setIsReadOnlyUser, + setIsBackendConnected, + setUserCredentials, + setErrorMessage, + setShowDisconnectButton, + showDisconnectButton, + } = useCredentials(); const { cancel } = useSpeechSynthesis(); + + + useEffect(() => { + async function initializeConnection() { + const session = localStorage.getItem('neo4j.connection'); + const environment = process.env.VITE_ENV; + const isDev = environment === 'DEV'; + // Fetch backend health status + try { + const response = await healthStatus(); + setIsBackendConnected(response.data.healthy); + } catch (error) { + setIsBackendConnected(false); + } + // To set the disconnect button state + const handleDisconnectButtonState = (isModalOpen: boolean) => { + setShowDisconnectButton(isModalOpen); + localStorage.setItem('disconnectButtonState', isModalOpen ? 'true' : 'false'); + } + // To parse and set user credentials from session + const setUserCredentialsFromSession = (neo4jConnection: string) => { + if (!neo4jConnection) { + console.error('Invalid session data:', neo4jConnection); + return; + } + try { + const parsedConnection = JSON.parse(neo4jConnection); + if ( + parsedConnection.uri && + parsedConnection.user && + parsedConnection.password && + parsedConnection.database && + typeof parsedConnection.uri === 'string' && + typeof parsedConnection.user === 'string' && + typeof parsedConnection.password === 'string' && + typeof parsedConnection.database === 'string' + ) { + setUserCredentials({ + uri: parsedConnection.uri, + userName: parsedConnection.user, + password: atob(parsedConnection.password), + database: parsedConnection.database, + }); + setGdsActive(parsedConnection.isGDS); + //setIsReadOnlyUser(parsedConnection.isReadOnlyUser || false); + } else { + console.error('Invalid parsed session data:', parsedConnection); + } + } catch (error) { + console.error('Failed to parse session data:', error); + } + } + // To update credentials if environment values differ + const updateSessionIfNeeded = (envCredentials: UserCredentials, storedSession: string) => { + try { + const storedCredentials = JSON.parse(storedSession); + const isDiffCreds = + envCredentials.uri !== storedCredentials.uri || + envCredentials.userName !== storedCredentials.user || + btoa(envCredentials.password) !== storedCredentials.password || + envCredentials.database !== storedCredentials.database; + if (isDiffCreds) { + setUserCredentials(envCredentials); + localStorage.setItem( + 'neo4j.connection', + JSON.stringify({ + uri: envCredentials.uri, + user: envCredentials.userName, + password: btoa(envCredentials.password), + database: envCredentials.database, + userDbVectorIndex: 384, + // isReadOnlyUser: envCredentials.isReadonlyUser, + isGDS: envCredentials.isGds, + }) + ); + return true; + } + return false; + } catch (error) { + console.error('Failed to update session:', error); + return false; + } + } + try { + // Handle case where session exists + if (session) { + if (isDev) { + let backendApiResponse; + try { + backendApiResponse = await envConnectionAPI(); + const connectionData = backendApiResponse.data; + const envCredentials = { + uri: connectionData.data.uri, + password: atob(connectionData.data.password), + userName: connectionData.data.user_name, + database: connectionData.data.database, + // isReadonlyUser: connectionData.data.write_access, + isGds: connectionData.data.gds_status, + }; + const updated = updateSessionIfNeeded(envCredentials, session); + if (!updated) { + setUserCredentialsFromSession(session); // Using stored session if no update is needed + } + setConnectionStatus(!!connectionData.data.graph_connection); + setIsBackendConnected(true); + handleDisconnectButtonState(false); + } catch (error) { + console.error('Error in DEV session handling:', error); + handleDisconnectButtonState(true); + setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + setErrorMessage(backendApiResponse?.data.error); + } + } else { + // For PROD, picking the session values + setUserCredentialsFromSession(session); + setConnectionStatus(true); + setIsBackendConnected(true); + handleDisconnectButtonState(true); + } + return; + } + // Handle case where no session exists + if (isDev) { + let envAPiResponse; + try { + envAPiResponse = await envConnectionAPI(); + const connectionData = envAPiResponse.data.data; + const credentials = { + uri: connectionData.uri, + password: atob(connectionData.password), + userName: connectionData.user_name, + database: connectionData.database, + // isReadonlyUser: connectionData.write_access, + isGds: connectionData.gds_status, + }; + setUserCredentials(credentials); + localStorage.setItem( + 'neo4j.connection', + JSON.stringify({ + uri: credentials.uri, + user: credentials.userName, + password: btoa(credentials.password), + database: credentials.database, + userDbVectorIndex: 384, + // isReadOnlyUser: credentials.isReadonlyUser, + isGDS: credentials.isGds, + }) + ); + setConnectionStatus(!!connectionData.graph_connection); + setIsBackendConnected(true); + handleDisconnectButtonState(false); + } catch (error) { + console.error('Error in DEV no-session handling:', error); + handleDisconnectButtonState(true); + setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + setErrorMessage(envAPiResponse?.data.error); + } + } else { + // For PROD: Open modal to manually connect + handleDisconnectButtonState(true); + setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + setIsBackendConnected(false); + } + } catch (error) { + console.error('Error in initializeConnection:', error); + setIsBackendConnected(false); + } + } + initializeConnection(); + }, []); + const deleteOnClick = async () => { try { setClearHistoryData(true); @@ -79,6 +272,16 @@ const PageLayout: React.FC = () => { return (
          + }> + + = () => { setIsSchema={setIsSchema} showEnhancementDialog={showEnhancementDialog} toggleEnhancementDialog={toggleEnhancementDialog} + setOpenConnection={setOpenConnection} + showDisconnectButton={showDisconnectButton} + connectionStatus={connectionStatus} /> {showDrawerChatbot && ( (initialusername ?? 'neo4j'); const [password, setPassword] = useState(''); const [connectionMessage, setMessage] = useState({ type: 'unknown', content: '' }); - const { setUserCredentials, userCredentials, setGdsActive, setIsReadOnlyUser } = useCredentials(); + const { setUserCredentials, userCredentials, setGdsActive, setIsReadOnlyUser, errorMessage } = useCredentials(); const [isLoading, setIsLoading] = useState(false); const [searchParams, setSearchParams] = useSearchParams(); const [userDbVectorIndex, setUserDbVectorIndex] = useState(initialuserdbvectorindex ?? undefined); @@ -123,6 +123,12 @@ export default function ConnectionModal({ } }, [isVectorIndexMatch, chunksExistsWithDifferentEmbedding, chunksExistsWithoutEmbedding, userCredentials]); + useEffect(() => { + if (errorMessage) { + setMessage({ type: 'danger', content: errorMessage }); + } + }, [errorMessage]); + const parseAndSetURI = (uri: string, urlparams = false) => { const uriParts: string[] = uri.split('://'); let uriHost: string[] | string; diff --git a/frontend/src/context/UserCredentials.tsx b/frontend/src/context/UserCredentials.tsx index cd3db2d62..29eee0c14 100644 --- a/frontend/src/context/UserCredentials.tsx +++ b/frontend/src/context/UserCredentials.tsx @@ -1,4 +1,4 @@ -import { createContext, useState, useContext, FunctionComponent, ReactNode, useReducer } from 'react'; +import { createContext, useState, useContext, FunctionComponent, ReactNode } from 'react'; import { ContextProps, UserCredentials } from '../types'; type Props = { @@ -14,6 +14,12 @@ export const UserConnection = createContext({ setConnectionStatus: () => null, isReadOnlyUser: false, setIsReadOnlyUser: () => null, + isBackendConnected: false, + setIsBackendConnected: () => null, + errorMessage: '', + setErrorMessage: () => null, + showDisconnectButton: false, + setShowDisconnectButton: () => null, }); export const useCredentials = () => { const userCredentials = useContext(UserConnection); @@ -23,7 +29,10 @@ const UserCredentialsWrapper: FunctionComponent = (props) => { const [userCredentials, setUserCredentials] = useState(null); const [isGdsActive, setGdsActive] = useState(false); const [isReadOnlyUser, setIsReadOnlyUser] = useState(false); - const [connectionStatus, setConnectionStatus] = useReducer((state) => !state, false); + const [connectionStatus, setConnectionStatus] = useState(false); + const [isBackendConnected, setIsBackendConnected] = useState(false); + const [errorMessage, setErrorMessage] = useState(''); + const [showDisconnectButton, setShowDisconnectButton] = useState(false); const value = { userCredentials, setUserCredentials, @@ -33,6 +42,12 @@ const UserCredentialsWrapper: FunctionComponent = (props) => { setConnectionStatus, isReadOnlyUser, setIsReadOnlyUser, + isBackendConnected, + setIsBackendConnected, + errorMessage, + setErrorMessage, + showDisconnectButton, + setShowDisconnectButton, }; return {props.children}; diff --git a/frontend/src/services/ConnectAPI.ts b/frontend/src/services/ConnectAPI.ts index 026c41c44..c802a5946 100644 --- a/frontend/src/services/ConnectAPI.ts +++ b/frontend/src/services/ConnectAPI.ts @@ -18,4 +18,16 @@ const connectAPI = async (connectionURI: string, username: string, password: str throw error; } }; -export default connectAPI; + +const envConnectionAPI = async () => { + try { + const conectionUrl = `/backend_connection_configuation`; + const response = await api.post(conectionUrl); + return response; + } catch (error) { + console.log('API Connection error', error); + throw error; + } +}; + +export { connectAPI, envConnectionAPI }; diff --git a/frontend/src/services/GetFiles.ts b/frontend/src/services/GetFiles.ts index 056a9cc05..4193a1d6b 100644 --- a/frontend/src/services/GetFiles.ts +++ b/frontend/src/services/GetFiles.ts @@ -3,9 +3,9 @@ import api from '../API/Index'; export const getSourceNodes = async (userCredentials: UserCredentials) => { try { - const encodedstr = btoa(userCredentials.password); + const encodedstr = btoa(userCredentials?.password); const response = await api.get( - `/sources_list?uri=${userCredentials.uri}&database=${userCredentials.database}&userName=${userCredentials.userName}&password=${encodedstr}` + `/sources_list?uri=${userCredentials?.uri}&database=${userCredentials?.database}&userName=${userCredentials?.userName}&password=${encodedstr}` ); return response; } catch (error) { diff --git a/frontend/src/types.ts b/frontend/src/types.ts index fcb170604..41b652683 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -152,6 +152,9 @@ export interface ContentProps { setIsSchema: Dispatch>; showEnhancementDialog: boolean; toggleEnhancementDialog: () => void; + setOpenConnection: Dispatch>; + showDisconnectButton: boolean; + connectionStatus: boolean; } export interface FileTableProps { @@ -748,6 +751,12 @@ export interface ContextProps { setIsReadOnlyUser: Dispatch>; connectionStatus: boolean; setConnectionStatus: Dispatch>; + isBackendConnected: boolean; + setIsBackendConnected: Dispatch>; + errorMessage: string; + setErrorMessage: Dispatch>; + showDisconnectButton: boolean; + setShowDisconnectButton: Dispatch>; } export interface MessageContextType { messages: Messages[] | []; From 63325f96fe36e4b45c392627238c314ce491be9d Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 26 Nov 2024 16:12:07 +0530 Subject: [PATCH 265/292] Update Content.tsx --- frontend/src/components/Content.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 48ef941ee..47614e042 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -165,8 +165,8 @@ const Content: React.FC = ({ try { const response = await postProcessing(userCredentials as UserCredentials, postProcessingTasks); if (response.data.status === 'Success') { - const communityfiles = response.data.data; - communityfiles.forEach((c: any) => { + const communityfiles = response.data?.data; + communityfiles?.forEach((c: any) => { setFilesData((prev) => { return prev.map((f) => { if (f.name === c.filename) { From 1992de7bfb3a1c40f4dc83794c88127b8622062d Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 26 Nov 2024 16:13:09 +0530 Subject: [PATCH 266/292] Update Content.tsx --- frontend/src/components/Content.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 47614e042..9b39e7b99 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -450,8 +450,8 @@ const Content: React.FC = ({ try { const response = await postProcessing(userCredentials as UserCredentials, postProcessingTasks); if (response.data.status === 'Success') { - const communityfiles = response.data.data; - communityfiles.forEach((c: any) => { + const communityfiles = response.data?.data; + communityfiles?.forEach((c: any) => { setFilesData((prev) => { return prev.map((f) => { if (f.name === c.filename) { From 3e348743462a1e242d59e837642d849b02f0a9b1 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 27 Nov 2024 08:56:19 +0000 Subject: [PATCH 267/292] build fix --- frontend/src/components/Layout/PageLayout.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 5cfe7bd12..7f3b75185 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -59,7 +59,6 @@ const PageLayout: React.FC = () => { const { setConnectionStatus, setGdsActive, - setIsReadOnlyUser, setIsBackendConnected, setUserCredentials, setErrorMessage, From 22a57c562a645767da4563a1fdaedd5e20fb9212 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 27 Nov 2024 11:33:53 +0000 Subject: [PATCH 268/292] communitifiles array check --- frontend/src/components/Content.tsx | 170 +++++------------- .../src/components/Layout/DrawerDropzone.tsx | 84 +++++---- frontend/src/components/Layout/Header.tsx | 2 +- frontend/src/components/Layout/PageLayout.tsx | 67 ++++--- 4 files changed, 120 insertions(+), 203 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 9b39e7b99..0b27db6b6 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -4,20 +4,11 @@ import { Button, Typography, Flex, StatusIndicator, useMediaQuery } from '@neo4j import { useCredentials } from '../context/UserCredentials'; import { useFileContext } from '../context/UsersFiles'; import { extractAPI } from '../utils/FileAPI'; -import { - BannerAlertProps, - ChildRef, - ContentProps, - CustomFile, - OptionType, - UserCredentials, - chunkdata, -} from '../types'; +import { BannerAlertProps, ChildRef, ContentProps, CustomFile, OptionType, UserCredentials, chunkdata } from '../types'; import deleteAPI from '../services/DeleteFiles'; import { postProcessing } from '../services/PostProcessing'; import { triggerStatusUpdateAPI } from '../services/ServerSideStatusUpdateAPI'; import useServerSideEvent from '../hooks/useSse'; -// import { useSearchParams } from 'react-router-dom'; import { batchSize, buttonCaptions, @@ -106,7 +97,6 @@ const Content: React.FC = ({ ); const [showDeletePopUp, setshowDeletePopUp] = useState(false); const [deleteLoading, setdeleteLoading] = useState(false); - // const [searchParams] = useSearchParams(); const { updateStatusForLargeFiles } = useServerSideEvent( (inMinutes, time, fileName) => { @@ -126,32 +116,7 @@ const Content: React.FC = ({ setCurrentPage((prev) => prev - 1); await getChunks(documentName, currentPage - 1); }; - // useEffect(() => { - // if (!init && !searchParams.has('connectURL')) { - // let session = localStorage.getItem('neo4j.connection'); - // if (session) { - // let neo4jConnection = JSON.parse(session); - // setUserCredentials({ - // uri: neo4jConnection.uri, - // userName: neo4jConnection.user, - // password: atob(neo4jConnection.password), - // database: neo4jConnection.database, - // port: neo4jConnection.uri.split(':')[2], - // }); - // if (neo4jConnection.isgdsActive !== undefined) { - // setGdsActive(neo4jConnection.isgdsActive); - // } - // if (neo4jConnection.isReadOnlyUser !== undefined) { - // setIsReadOnlyUser(neo4jConnection.isReadOnlyUser); - // } - // } else { - // setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - // } - // setInit(true); - // } else { - // setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - // } - // }, []); + useEffect(() => { if (afterFirstRender) { localStorage.setItem('processedCount', JSON.stringify({ db: userCredentials?.uri, count: processedCount })); @@ -166,26 +131,28 @@ const Content: React.FC = ({ const response = await postProcessing(userCredentials as UserCredentials, postProcessingTasks); if (response.data.status === 'Success') { const communityfiles = response.data?.data; - communityfiles?.forEach((c: any) => { - setFilesData((prev) => { - return prev.map((f) => { - if (f.name === c.filename) { - return { - ...f, - chunkNodeCount: c.chunkNodeCount ?? 0, - entityNodeCount: c.entityNodeCount ?? 0, - communityNodeCount: c.communityNodeCount ?? 0, - chunkRelCount: c.chunkRelCount ?? 0, - entityEntityRelCount: c.entityEntityRelCount ?? 0, - communityRelCount: c.communityRelCount ?? 0, - nodesCount: c.nodeCount, - relationshipsCount: c.relationshipCount, - }; - } - return f; + if (Array.isArray(communityfiles) && communityfiles.length) { + communityfiles?.forEach((c: any) => { + setFilesData((prev) => { + return prev.map((f) => { + if (f.name === c.filename) { + return { + ...f, + chunkNodeCount: c.chunkNodeCount ?? 0, + entityNodeCount: c.entityNodeCount ?? 0, + communityNodeCount: c.communityNodeCount ?? 0, + chunkRelCount: c.chunkRelCount ?? 0, + entityEntityRelCount: c.entityEntityRelCount ?? 0, + communityRelCount: c.communityRelCount ?? 0, + nodesCount: c.nodeCount, + relationshipsCount: c.relationshipCount, + }; + } + return f; + }); }); }); - }); + } showSuccessToast('All Q&A functionality is available now.'); } else { throw new Error(response.data.error); @@ -213,61 +180,6 @@ const Content: React.FC = ({ } }, [isSchema]); - // useEffect(() => { - // const connection = localStorage.getItem('neo4j.connection'); - // if (connection != null) { - // (async () => { - // const parsedData = JSON.parse(connection); - // const response = await connectAPI( - // parsedData.uri, - // parsedData.user, - // atob(parsedData.password), - // parsedData.database - // ); - // if (response?.data?.status === 'Success') { - // localStorage.setItem( - // 'neo4j.connection', - // JSON.stringify({ - // ...parsedData, - // userDbVectorIndex: response.data.data.db_vector_dimension, - // password: btoa(atob(parsedData.password)), - // }) - // ); - // if (response.data.data.gds_status !== undefined) { - // setGdsActive(response.data.data.gds_status); - // } - // if (response.data.data.write_access !== undefined) { - // setIsReadOnlyUser(!response.data.data.write_access); - // } - // if ( - // (response.data.data.application_dimension === response.data.data.db_vector_dimension || - // response.data.data.db_vector_dimension == 0) && - // !response.data.data.chunks_exists - // ) { - // setConnectionStatus(true); - // setOpenConnection((prev) => ({ ...prev, openPopUp: false })); - // } else { - // setOpenConnection({ - // openPopUp: true, - // chunksExists: response.data.data.chunks_exists as boolean, - // vectorIndexMisMatch: - // response.data.data.db_vector_dimension > 0 && - // response.data.data.db_vector_dimension != response.data.data.application_dimension, - // chunksExistsWithDifferentDimension: - // response.data.data.db_vector_dimension > 0 && - // response.data.data.db_vector_dimension != response.data.data.application_dimension && - // (response.data.data.chunks_exists ?? true), - // }); - // setConnectionStatus(false); - // } - // } else { - // setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - // setConnectionStatus(false); - // } - // })(); - // } - // }, []); - const handleDropdownChange = (selectedOption: OptionType | null | void) => { if (selectedOption?.value) { setModel(selectedOption?.value); @@ -451,26 +363,28 @@ const Content: React.FC = ({ const response = await postProcessing(userCredentials as UserCredentials, postProcessingTasks); if (response.data.status === 'Success') { const communityfiles = response.data?.data; - communityfiles?.forEach((c: any) => { - setFilesData((prev) => { - return prev.map((f) => { - if (f.name === c.filename) { - return { - ...f, - chunkNodeCount: c.chunkNodeCount ?? 0, - entityNodeCount: c.entityNodeCount ?? 0, - communityNodeCount: c.communityNodeCount ?? 0, - chunkRelCount: c.chunkRelCount ?? 0, - entityEntityRelCount: c.entityEntityRelCount ?? 0, - communityRelCount: c.communityRelCount ?? 0, - nodesCount: c.nodeCount, - relationshipsCount: c.relationshipCount, - }; - } - return f; + if (Array.isArray(communityfiles) && communityfiles.length) { + communityfiles?.forEach((c: any) => { + setFilesData((prev) => { + return prev.map((f) => { + if (f.name === c.filename) { + return { + ...f, + chunkNodeCount: c.chunkNodeCount ?? 0, + entityNodeCount: c.entityNodeCount ?? 0, + communityNodeCount: c.communityNodeCount ?? 0, + chunkRelCount: c.chunkRelCount ?? 0, + entityEntityRelCount: c.entityEntityRelCount ?? 0, + communityRelCount: c.communityRelCount ?? 0, + nodesCount: c.nodeCount, + relationshipsCount: c.relationshipCount, + }; + } + return f; + }); }); }); - }); + } showSuccessToast('All Q&A functionality is available now.'); } else { throw new Error(response.data.error); diff --git a/frontend/src/components/Layout/DrawerDropzone.tsx b/frontend/src/components/Layout/DrawerDropzone.tsx index f84910d3d..4b9e89dc4 100644 --- a/frontend/src/components/Layout/DrawerDropzone.tsx +++ b/frontend/src/components/Layout/DrawerDropzone.tsx @@ -56,8 +56,9 @@ const DrawerDropzone: React.FC = ({
          {process.env.VITE_ENV != 'PROD' && ( @@ -77,29 +78,31 @@ const DrawerDropzone: React.FC = ({
          )} {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( + (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( <> {(APP_SOURCES.includes('youtube') || APP_SOURCES.includes('wiki') || APP_SOURCES.includes('web')) && ( -
          - - -
          - )} +
          + + +
          + )} {APP_SOURCES.includes('s3') && (
          }> @@ -109,8 +112,9 @@ const DrawerDropzone: React.FC = ({ )} {APP_SOURCES.includes('gcs') && (
          }> @@ -139,26 +143,27 @@ const DrawerDropzone: React.FC = ({ {((APP_SOURCES != undefined && APP_SOURCES.includes('youtube')) || (APP_SOURCES != undefined && APP_SOURCES.includes('wiki')) || (APP_SOURCES != undefined && APP_SOURCES.includes('web'))) && ( -
          - - -
          - )} +
          + + +
          + )} {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( + (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( <> {APP_SOURCES != undefined && APP_SOURCES.includes('s3') && (
          }> @@ -168,8 +173,9 @@ const DrawerDropzone: React.FC = ({ )} {APP_SOURCES != undefined && APP_SOURCES.includes('gcs') && (
          = ({ chatOnly, deleteOnClick, setOpenConnecti const userName = neo4jConnection.user; const { password } = neo4jConnection; const { database } = neo4jConnection; - const port = uri.split(':')[2]; + const [,port]= uri.split(':'); const encodedPassword = btoa(password); const chatUrl = `/chat-only?uri=${encodeURIComponent( uri diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 7f3b75185..764f32e78 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -67,7 +67,6 @@ const PageLayout: React.FC = () => { } = useCredentials(); const { cancel } = useSpeechSynthesis(); - useEffect(() => { async function initializeConnection() { const session = localStorage.getItem('neo4j.connection'); @@ -84,7 +83,7 @@ const PageLayout: React.FC = () => { const handleDisconnectButtonState = (isModalOpen: boolean) => { setShowDisconnectButton(isModalOpen); localStorage.setItem('disconnectButtonState', isModalOpen ? 'true' : 'false'); - } + }; // To parse and set user credentials from session const setUserCredentialsFromSession = (neo4jConnection: string) => { if (!neo4jConnection) { @@ -110,14 +109,14 @@ const PageLayout: React.FC = () => { database: parsedConnection.database, }); setGdsActive(parsedConnection.isGDS); - //setIsReadOnlyUser(parsedConnection.isReadOnlyUser || false); + // setIsReadOnlyUser(parsedConnection.isReadOnlyUser || false); } else { console.error('Invalid parsed session data:', parsedConnection); } } catch (error) { console.error('Failed to parse session data:', error); } - } + }; // To update credentials if environment values differ const updateSessionIfNeeded = (envCredentials: UserCredentials, storedSession: string) => { try { @@ -148,43 +147,41 @@ const PageLayout: React.FC = () => { console.error('Failed to update session:', error); return false; } - } + }; try { // Handle case where session exists - if (session) { - if (isDev) { - let backendApiResponse; - try { - backendApiResponse = await envConnectionAPI(); - const connectionData = backendApiResponse.data; - const envCredentials = { - uri: connectionData.data.uri, - password: atob(connectionData.data.password), - userName: connectionData.data.user_name, - database: connectionData.data.database, - // isReadonlyUser: connectionData.data.write_access, - isGds: connectionData.data.gds_status, - }; - const updated = updateSessionIfNeeded(envCredentials, session); - if (!updated) { - setUserCredentialsFromSession(session); // Using stored session if no update is needed - } - setConnectionStatus(!!connectionData.data.graph_connection); - setIsBackendConnected(true); - handleDisconnectButtonState(false); - } catch (error) { - console.error('Error in DEV session handling:', error); - handleDisconnectButtonState(true); - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - setErrorMessage(backendApiResponse?.data.error); + if (session && isDev) { + let backendApiResponse; + try { + backendApiResponse = await envConnectionAPI(); + const connectionData = backendApiResponse.data; + const envCredentials = { + uri: connectionData.data.uri, + password: atob(connectionData.data.password), + userName: connectionData.data.user_name, + database: connectionData.data.database, + // isReadonlyUser: connectionData.data.write_access, + isGds: connectionData.data.gds_status, + }; + const updated = updateSessionIfNeeded(envCredentials, session); + if (!updated) { + setUserCredentialsFromSession(session); // Using stored session if no update is needed } - } else { + setConnectionStatus(Boolean(connectionData.data.graph_connection)); + setIsBackendConnected(true); + handleDisconnectButtonState(false); + } catch (error) { + console.error('Error in DEV session handling:', error); + handleDisconnectButtonState(true); + setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + setErrorMessage(backendApiResponse?.data.error); + } + }else{ // For PROD, picking the session values - setUserCredentialsFromSession(session); + setUserCredentialsFromSession(session as string); setConnectionStatus(true); setIsBackendConnected(true); handleDisconnectButtonState(true); - } return; } // Handle case where no session exists @@ -214,7 +211,7 @@ const PageLayout: React.FC = () => { isGDS: credentials.isGds, }) ); - setConnectionStatus(!!connectionData.graph_connection); + setConnectionStatus(Boolean(connectionData.graph_connection)); setIsBackendConnected(true); handleDisconnectButtonState(false); } catch (error) { From 08a97554e86e2ae2854cdc6f60b2d6ec3769d166 Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Thu, 28 Nov 2024 11:54:29 +0530 Subject: [PATCH 269/292] combining one chunk (#901) * combining one chunk * updated llm.py * updated llm.py --- backend/src/llm.py | 5 ++--- backend/src/shared/common_fn.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/backend/src/llm.py b/backend/src/llm.py index bfb2a7abc..de50fa181 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -18,7 +18,6 @@ import google.auth from src.shared.constants import MODEL_VERSIONS, PROMPT_TO_ALL_LLMs - def get_llm(model: str): """Retrieve the specified language model based on the model name.""" env_key = "LLM_MODEL_CONFIG_" + model @@ -201,8 +200,8 @@ async def get_graph_document_list( async def get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowedRelationship): llm, model_name = get_llm(model) - #combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list) - combined_chunk_document_list = get_chunk_id_as_doc_metadata(chunkId_chunkDoc_list) + combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list) + #combined_chunk_document_list = get_chunk_id_as_doc_metadata(chunkId_chunkDoc_list) if allowedNodes is None or allowedNodes=="": allowedNodes =[] diff --git a/backend/src/shared/common_fn.py b/backend/src/shared/common_fn.py index e6de7a583..916e01e89 100644 --- a/backend/src/shared/common_fn.py +++ b/backend/src/shared/common_fn.py @@ -60,7 +60,7 @@ def get_chunk_and_graphDocument(graph_document_list, chunkId_chunkDoc_list): logging.info("creating list of chunks and graph documents in get_chunk_and_graphDocument func") lst_chunk_chunkId_document=[] for graph_document in graph_document_list: - for chunk_id in graph_document.source.metadata['chunk_id'] : + for chunk_id in graph_document.source.metadata['combined_chunk_ids'] : lst_chunk_chunkId_document.append({'graph_doc':graph_document,'chunk_id':chunk_id}) return lst_chunk_chunkId_document From 3774a86875d07ed80dcffe5fb0ee6a8b1631cb6c Mon Sep 17 00:00:00 2001 From: kaustubh-darekar Date: Thu, 28 Nov 2024 12:43:55 +0530 Subject: [PATCH 270/292] Delete query refined to delete all related nodes of file (#904) --- backend/score.py | 2 +- backend/src/graphDB_dataAccess.py | 30 ++++++++++++++++-------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/backend/score.py b/backend/score.py index a64d22fca..02ca99238 100644 --- a/backend/score.py +++ b/backend/score.py @@ -317,7 +317,7 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database try: graph = create_graph_database_connection(uri, userName, password, database) tasks = set(map(str.strip, json.loads(tasks))) - count_response = "" + count_response = [] start = time.time() if "materialize_text_chunk_similarities" in tasks: await asyncio.to_thread(update_graph, graph) diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index bab2bc57f..4c2e6435b 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -283,16 +283,18 @@ def delete_file_from_graph(self, filenames, source_types, deleteEntities:str, me return count(*) as deletedChunks """ query_to_delete_document_and_entities=""" - match (d:Document) where d.fileName IN $filename_list and d.fileSource in $source_types_list - detach delete d - with collect(d) as documents - unwind documents as d - match (d)<-[:PART_OF]-(c:Chunk) - detach delete c - with * - match (c)-[:HAS_ENTITY]->(e) - where not exists { (e)<-[:HAS_ENTITY]-()-[:PART_OF]->(d2) where not d2 in documents } - detach delete e + MATCH (d:Document) + WHERE d.fileName IN $filename_list AND d.fileSource IN $source_types_list + WITH COLLECT(d) as documents + UNWIND documents AS d + MATCH (d)<-[:PART_OF]-(c:Chunk) + WITH d, c, documents + OPTIONAL MATCH (c)-[:HAS_ENTITY]->(e) + WHERE NOT EXISTS { + MATCH (e)<-[:HAS_ENTITY]-(c2)-[:PART_OF]->(d2:Document) + WHERE NOT d2 IN documents + } + DETACH DELETE c, e, d """ query_to_delete_communities = """ MATCH (c:`__Community__`) @@ -301,9 +303,9 @@ def delete_file_from_graph(self, filenames, source_types, deleteEntities:str, me WITH * UNWIND range(1, $max_level) AS level - MATCH (c:`__Community__`) - WHERE c.level = level AND NOT EXISTS { (c)<-[:PARENT_COMMUNITY]-(child) } - DETACH DELETE c + MATCH (c1:`__Community__`) + WHERE c1.level = level AND NOT EXISTS { (c1)<-[:PARENT_COMMUNITY]-(child) } + DETACH DELETE c1 """ param = {"filename_list" : filename_list, "source_types_list": source_types_list} community_param = {"max_level":MAX_COMMUNITY_LEVELS} @@ -456,7 +458,7 @@ def update_node_relationship_count(self,document_name): if (not document_name) and (community_flag): result = self.execute_query(NODEREL_COUNT_QUERY_WITH_COMMUNITY) elif (not document_name) and (not community_flag): - return None + return [] else: param = {"document_name": document_name} result = self.execute_query(NODEREL_COUNT_QUERY_WITHOUT_COMMUNITY, param) From bf7b739a2efb371c00e515f4a0f1d1a296c5245f Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Thu, 28 Nov 2024 10:04:43 +0000 Subject: [PATCH 271/292] readonly change --- frontend/src/components/Content.tsx | 33 +++++++++---------- frontend/src/components/Layout/PageLayout.tsx | 30 ++++++++--------- .../PostProcessingCheckList/index.tsx | 11 ++++--- frontend/src/context/UsersFiles.tsx | 17 ++++------ 4 files changed, 42 insertions(+), 49 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 0b27db6b6..764caca0f 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -128,7 +128,8 @@ const Content: React.FC = ({ (async () => { showNormalToast(); try { - const response = await postProcessing(userCredentials as UserCredentials, postProcessingTasks); + const payload = isGdsActive ? postProcessingTasks : postProcessingTasks.filter((task) => task !== 'enable_communities'); + const response = await postProcessing(userCredentials as UserCredentials, payload); if (response.data.status === 'Success') { const communityfiles = response.data?.data; if (Array.isArray(communityfiles) && communityfiles.length) { @@ -508,9 +509,8 @@ const Content: React.FC = ({ const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; const uriCoded = userCredentials?.uri.replace(/:\d+$/, ''); - const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${ - userCredentials?.port ?? '7687' - }`; + const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${userCredentials?.port ?? '7687' + }`; const encodedURL = encodeURIComponent(connectURL); const replacedUrl = bloomUrl?.replace('{CONNECT_URL}', encodedURL); window.open(replacedUrl, '_blank'); @@ -520,10 +520,10 @@ const Content: React.FC = ({ isLeftExpanded && isRightExpanded ? 'contentWithExpansion' : isRightExpanded - ? 'contentWithChatBot' - : !isLeftExpanded && !isRightExpanded - ? 'w-[calc(100%-128px)]' - : 'contentWithDropzoneExpansion'; + ? 'contentWithChatBot' + : !isLeftExpanded && !isRightExpanded + ? 'w-[calc(100%-128px)]' + : 'contentWithDropzoneExpansion'; const handleGraphView = () => { setOpenGraphView(true); @@ -555,12 +555,12 @@ const Content: React.FC = ({ return prev.map((f) => { return f.name === filename ? { - ...f, - status: 'Ready to Reprocess', - processingProgress: isStartFromBegining ? 0 : f.processingProgress, - nodesCount: isStartFromBegining ? 0 : f.nodesCount, - relationshipsCount: isStartFromBegining ? 0 : f.relationshipsCount, - } + ...f, + status: 'Ready to Reprocess', + processingProgress: isStartFromBegining ? 0 : f.processingProgress, + nodesCount: isStartFromBegining ? 0 : f.nodesCount, + relationshipsCount: isStartFromBegining ? 0 : f.relationshipsCount, + } : f; }); }); @@ -869,9 +869,8 @@ const Content: React.FC = ({ handleGenerateGraph={processWaitingFilesOnRefresh} > diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 764f32e78..ac528f898 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -29,7 +29,7 @@ const PageLayout: React.FC = () => { chunksExistsWithDifferentDimension: false, }); const largedesktops = useMediaQuery(`(min-width:1440px )`); - const { userCredentials, connectionStatus } = useCredentials(); + const { userCredentials, connectionStatus, setIsReadOnlyUser } = useCredentials(); const [isLeftExpanded, setIsLeftExpanded] = useState(Boolean(largedesktops)); const [isRightExpanded, setIsRightExpanded] = useState(Boolean(largedesktops)); const [showChatBot, setShowChatBot] = useState(false); @@ -96,11 +96,7 @@ const PageLayout: React.FC = () => { parsedConnection.uri && parsedConnection.user && parsedConnection.password && - parsedConnection.database && - typeof parsedConnection.uri === 'string' && - typeof parsedConnection.user === 'string' && - typeof parsedConnection.password === 'string' && - typeof parsedConnection.database === 'string' + parsedConnection.database ) { setUserCredentials({ uri: parsedConnection.uri, @@ -109,7 +105,7 @@ const PageLayout: React.FC = () => { database: parsedConnection.database, }); setGdsActive(parsedConnection.isGDS); - // setIsReadOnlyUser(parsedConnection.isReadOnlyUser || false); + setIsReadOnlyUser(parsedConnection.isReadOnlyUser); } else { console.error('Invalid parsed session data:', parsedConnection); } @@ -136,7 +132,7 @@ const PageLayout: React.FC = () => { password: btoa(envCredentials.password), database: envCredentials.database, userDbVectorIndex: 384, - // isReadOnlyUser: envCredentials.isReadonlyUser, + isReadOnlyUser: envCredentials.isReadonlyUser, isGDS: envCredentials.isGds, }) ); @@ -160,7 +156,7 @@ const PageLayout: React.FC = () => { password: atob(connectionData.data.password), userName: connectionData.data.user_name, database: connectionData.data.database, - // isReadonlyUser: connectionData.data.write_access, + isReadonlyUser: !connectionData.data.write_access, isGds: connectionData.data.gds_status, }; const updated = updateSessionIfNeeded(envCredentials, session); @@ -176,12 +172,12 @@ const PageLayout: React.FC = () => { setOpenConnection((prev) => ({ ...prev, openPopUp: true })); setErrorMessage(backendApiResponse?.data.error); } - }else{ - // For PROD, picking the session values - setUserCredentialsFromSession(session as string); - setConnectionStatus(true); - setIsBackendConnected(true); - handleDisconnectButtonState(true); + } else { + // For PROD, picking the session values + setUserCredentialsFromSession(session as string); + setConnectionStatus(true); + setIsBackendConnected(true); + handleDisconnectButtonState(true); return; } // Handle case where no session exists @@ -195,7 +191,7 @@ const PageLayout: React.FC = () => { password: atob(connectionData.password), userName: connectionData.user_name, database: connectionData.database, - // isReadonlyUser: connectionData.write_access, + isReadonlyUser: !connectionData.write_access, isGds: connectionData.gds_status, }; setUserCredentials(credentials); @@ -207,7 +203,7 @@ const PageLayout: React.FC = () => { password: btoa(credentials.password), database: credentials.database, userDbVectorIndex: 384, - // isReadOnlyUser: credentials.isReadonlyUser, + isReadOnlyUser: credentials.isReadonlyUser, isGDS: credentials.isGds, }) ); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx index d2b721995..d5ebc04a2 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx @@ -9,9 +9,6 @@ export default function PostProcessingCheckList() { const tablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); const { postProcessingTasks, setPostProcessingTasks } = useFileContext(); const { isGdsActive } = useCredentials(); - const handleCheckboxChange = (jobTitle: string, isChecked: boolean) => { - setPostProcessingTasks((prev) => (isChecked ? [...prev, jobTitle] : prev.filter((task) => task !== jobTitle))); - }; return (
          @@ -40,8 +37,14 @@ export default function PostProcessingCheckList() { ? isGdsActive && postProcessingTasks.includes(job.title) : postProcessingTasks.includes(job.title) } + onChange={(e) => { + if (e.target.checked) { + setPostProcessingTasks((prev) => [...prev, job.title]); + } else { + setPostProcessingTasks((prev) => prev.filter((s) => s !== job.title)); + } + }} isDisabled={isCreateCommunities && !isGdsActive} - onChange={(e) => handleCheckboxChange(job.title, e.target.checked)} ariaLabel='checkbox-postProcessing' /> {job.description} diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index 92f4a4b77..4a58ca5dc 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -35,17 +35,12 @@ const FileContextProvider: FC = ({ children }) => { triggeredFrom: '', show: false, }); - const [postProcessingTasks, setPostProcessingTasks] = useState(() => { - const tasks = [ - 'materialize_text_chunk_similarities', - 'enable_hybrid_search_and_fulltext_search_in_bloom', - 'materialize_entity_similarities', - ]; - if (isGdsActive) { - tasks.push('enable_communities'); - } - return tasks; - }); + const [postProcessingTasks, setPostProcessingTasks] = useState([ + 'materialize_text_chunk_similarities', + 'enable_hybrid_search_and_fulltext_search_in_bloom', + 'materialize_entity_similarities', + 'enable_communities', + ]); const [processedCount, setProcessedCount] = useState(0); const [postProcessingVal, setPostProcessingVal] = useState(false); From f4e593ebd1d047b35d83ad732141eb009aba2d4b Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Mon, 2 Dec 2024 13:38:48 +0530 Subject: [PATCH 272/292] Prod v6 fix (#909) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Staging to main (#735) * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * disabled the sumbit buttom on loading * Deduplication tab (#566) * de-duplication API * Update De-Duplicate query * created the Deduplication tab * added the API service * added the removeable tags for similar nodes in deduplication tab * Integrate Tag * added GraphLabel * added loader state * added the merge service * integrated the merge API * Merge Query issue fixed * Auto refresh the duplicate nodes after merging operation * added the description for de duplication * reset on merging --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Update frontend_docs.adoc (#538) * Update frontend_docs.adoc * doc update * Images * Images folder change * Images folder change * test image * Update frontend_docs.adoc * image change * Update frontend_docs.adoc * Update frontend_docs.adoc * added the Graph Mode SS * added the Query SS * Update frontend_docs.adoc * conflics fix * conflict fix * Update frontend_docs.adoc --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * updated langchain versions (#565) * Update the De-Duplication query * Node relationship id type none issue (#547) * de-duplication API * Update De-Duplicate query * Issue fixed Nodes,Relationship Id and Type None or Blank * added the tooltips * type fix * Unneccory import * added score threshold and added some error handling (#571) * Update requirements.txt * Tooltip and other UI fixes (#572) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-auth… * Update FileTable.tsx * merge changes * Update README.md * Update README.md * Update README.md * Update README.md * Graph visualization improvement and chunk text details (#860) * Update README.md * Dev to staging (#859) * processing count updated on cancel * format fixes * remove whitespace for enviroment variable which due to an error "xxx may not contain whitespace" (#707) * updated disconnected nodes * updated disconnected nodes * fix: Processed count update on failed condition * added disconnected and up nodes * removed __Entity__ labels * removed graph_object * removed graph object in the function * resetting the alert message on success scenario * Modified queries * populate graph schema * not clearing the password when there is error scenario * fixed the vector index loading issue * fix: empty credentials payload for recreate vector index api * chatbot status (#676) * chatbot status * connection status check for ASK button * refresh disable check * review comment resolved * format fixes * added properties and modified to entity labels * Post processing call after all files completion (#716) * Dev To STAGING (#532) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#535) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * for… * enable communities removed and delete queries updated * nginx security headers * removed parameter to include chunk nodes as entity source * Ready to reprocess * communities fix * dependency fix Reprocess fix * icon size fix * z index fix * icon fix * icon gap fix * upload status * Checkbox fix * fix: table issue chat bot clear issue * new document deletion, 2nd level community deletion handled * claer history * graph info and delete query update * format changes --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: Ikko Eltociear Ashimine Co-authored-by: Jayanth T Co-authored-by: Jayanth T Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: Michael Hunger Co-authored-by: Kain Shu <44948284+Kain-90@users.noreply.github.com> Co-authored-by: destiny966113 <90891243+destiny966113@users.noreply.github.com> Co-authored-by: Pravesh1988 Co-authored-by: edenbuaa Co-authored-by: kaustubh-darekar Co-authored-by: a-s-poorna --- backend/score.py | 1 - backend/src/graphDB_dataAccess.py | 41 +- frontend/Dockerfile | 2 +- frontend/nginx/nginx.prod.conf | 42 +- frontend/package.json | 4 +- frontend/src/App.css | 4 +- .../src/components/ChatBot/ChatInfoModal.tsx | 2 +- frontend/src/components/ChatBot/Chatbot.tsx | 12 +- .../components/ChatBot/CommunitiesInfo.tsx | 5 +- .../src/components/ChatBot/EntitiesInfo.tsx | 4 +- frontend/src/components/Content.tsx | 35 +- frontend/src/components/FileTable.tsx | 22 - .../components/Graph/GraphPropertiesTable.tsx | 2 +- frontend/src/components/Layout/Header.tsx | 2 +- frontend/src/components/Layout/PageLayout.tsx | 8 +- .../components/Popups/ChunkPopUp/index.tsx | 1 - .../Deduplication/index.tsx | 6 +- .../DeleteTabForOrphanNodes/index.tsx | 2 +- .../SelectedJobList.tsx | 4 +- .../Popups/RetryConfirmation/Index.tsx | 2 +- frontend/src/context/UsersFiles.tsx | 2 +- frontend/src/utils/Utils.ts | 2 +- frontend/yarn.lock | 1685 ++++++++--------- 23 files changed, 934 insertions(+), 956 deletions(-) diff --git a/backend/score.py b/backend/score.py index 02ca99238..0c2884c25 100644 --- a/backend/score.py +++ b/backend/score.py @@ -924,7 +924,6 @@ async def fetch_chunktext( page_no: int = Form(1) ): try: - start = time.time() result = await asyncio.to_thread( get_chunktext_results, diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index 4c2e6435b..41a37631c 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -187,14 +187,11 @@ def check_gds_version(self): result = self.graph.query(gds_procedure_count) total_gds_procedures = result[0]['totalGdsProcedures'] if result else 0 - enable_communities = os.environ.get('ENABLE_COMMUNITIES','').upper() == "TRUE" - logging.info(f"Enable Communities {enable_communities}") - - if enable_communities and total_gds_procedures > 0: + if total_gds_procedures > 0: logging.info("GDS is available in the database.") return True else: - logging.info("Communities are disabled or GDS is not available in the database.") + logging.info("GDS is not available in the database.") return False except Exception as e: logging.error(f"An error occurred while checking GDS version: {e}") @@ -285,27 +282,31 @@ def delete_file_from_graph(self, filenames, source_types, deleteEntities:str, me query_to_delete_document_and_entities=""" MATCH (d:Document) WHERE d.fileName IN $filename_list AND d.fileSource IN $source_types_list - WITH COLLECT(d) as documents + WITH COLLECT(d) AS documents UNWIND documents AS d - MATCH (d)<-[:PART_OF]-(c:Chunk) - WITH d, c, documents - OPTIONAL MATCH (c)-[:HAS_ENTITY]->(e) + OPTIONAL MATCH (d)<-[:PART_OF]-(c:Chunk) + OPTIONAL MATCH (c:Chunk)-[:HAS_ENTITY]->(e) + WITH d, c, e, documents WHERE NOT EXISTS { MATCH (e)<-[:HAS_ENTITY]-(c2)-[:PART_OF]->(d2:Document) WHERE NOT d2 IN documents } - DETACH DELETE c, e, d - """ + WITH d, COLLECT(c) AS chunks, COLLECT(e) AS entities + FOREACH (chunk IN chunks | DETACH DELETE chunk) + FOREACH (entity IN entities | DETACH DELETE entity) + DETACH DELETE d + """ query_to_delete_communities = """ - MATCH (c:`__Community__`) - WHERE NOT EXISTS { ()-[:IN_COMMUNITY]->(c) } AND c.level = 0 - DETACH DELETE c - - WITH * - UNWIND range(1, $max_level) AS level - MATCH (c1:`__Community__`) - WHERE c1.level = level AND NOT EXISTS { (c1)<-[:PARENT_COMMUNITY]-(child) } - DETACH DELETE c1 + MATCH (c:`__Community__`) + WHERE c.level = 0 AND NOT EXISTS { ()-[:IN_COMMUNITY]->(c) } + DETACH DELETE c + WITH 1 AS dummy + UNWIND range(1, $max_level) AS level + CALL (level) { + MATCH (c:`__Community__`) + WHERE c.level = level AND NOT EXISTS { ()-[:PARENT_COMMUNITY]->(c) } + DETACH DELETE c + } """ param = {"filename_list" : filename_list, "source_types_list": source_types_list} community_param = {"max_level":MAX_COMMUNITY_LEVELS} diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 066563b30..5cbc3d8de 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -40,4 +40,4 @@ COPY --from=build /app/dist /usr/share/nginx/html COPY /nginx/nginx.${DEPLOYMENT_ENV}.conf /etc/nginx/templates/nginx.conf.template EXPOSE 8080 -CMD ["nginx", "-g", "daemon off;"] +CMD ["nginx", "-g", "daemon off;"] \ No newline at end of file diff --git a/frontend/nginx/nginx.prod.conf b/frontend/nginx/nginx.prod.conf index f58585714..d9c369326 100644 --- a/frontend/nginx/nginx.prod.conf +++ b/frontend/nginx/nginx.prod.conf @@ -1,22 +1,22 @@ -server { - listen 8080; - add_header X-Frame-Options "DENY"; - add_header X-Content-Type-Options "nosniff"; - add_header Content-Security-Policy "connect-src 'self' ${VITE_BACKEND_API_URL} ${VITE_SEGMENT_API_URL}; - frame-src 'self' *.youtube.com *.wikipedia.org; - script-src 'self' 'unsafe-inline' https://accounts.google.com/gsi/client; - default-src 'self' *.${VITE_FRONTEND_HOSTNAME} data:; - style-src 'self' *.googleapis.com 'unsafe-inline';" always ; - gzip on; - location / { - root /usr/share/nginx/html; - index index.html index.htm; - try_files $uri $uri/ /index.html; - } - - error_page 401 403 404 index.html; - - location /public { - root /usr/local/var/www; - } +server { + listen 8080; + add_header X-Frame-Options "DENY"; + add_header X-Content-Type-Options "nosniff"; + add_header Content-Security-Policy "connect-src 'self' ${VITE_BACKEND_API_URL} ${VITE_SEGMENT_API_URL}; + frame-src 'self' *.youtube.com *.wikipedia.org; + script-src 'self' 'unsafe-inline' https://accounts.google.com/gsi/client; + default-src 'self' *.${VITE_FRONTEND_HOSTNAME} data:; + style-src 'self' *.googleapis.com 'unsafe-inline';" always ; + gzip on; + location / { + root /usr/share/nginx/html; + index index.html index.htm; + try_files $uri $uri/ /index.html; + } + + error_page 401 403 404 index.html; + + location /public { + root /usr/local/var/www; + } } \ No newline at end of file diff --git a/frontend/package.json b/frontend/package.json index 3c869ad59..846621f49 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -15,8 +15,8 @@ "@mui/material": "^5.15.10", "@mui/styled-engine": "^5.15.9", "@neo4j-devtools/word-color": "^0.0.8", - "@neo4j-ndl/base": "^3.0.14", - "@neo4j-ndl/react": "^3.0.23", + "@neo4j-ndl/base": "^3.0.16", + "@neo4j-ndl/react": "^3.0.30", "@neo4j-nvl/base": "^0.3.6", "@neo4j-nvl/react": "^0.3.6", "@react-oauth/google": "^0.12.1", diff --git a/frontend/src/App.css b/frontend/src/App.css index 70127b6af..93eab4ae5 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -58,7 +58,7 @@ } .contentWithChatBot { - width: calc(-528px + 100dvw); + width: calc(-512px + 100dvw); height: calc(100dvh - 58px); padding: 3px; display: flex; @@ -395,4 +395,4 @@ } .tbody-light .ndl-data-grid-tr:hover { --cell-background: rgb(226 227 229) !important; -} \ No newline at end of file +} diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 1609f1eac..68c07227d 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -321,7 +321,7 @@ const ChatInfoModal: React.FC = ({ [activeChatmodes] ); return ( -
          +
          = (props) => { } else { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg ) ); } @@ -264,7 +264,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg ) ); } @@ -273,7 +273,7 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId + msg.id === chatbotMessageId ? { ...msg, modes: { @@ -281,7 +281,7 @@ const Chatbot: FC = (props) => { [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, }, } - : msg) + : msg ) ); } @@ -294,7 +294,7 @@ const Chatbot: FC = (props) => { if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId + msg.id === chatbotMessageId ? { ...msg, isLoading: false, @@ -306,7 +306,7 @@ const Chatbot: FC = (props) => { }, }, } - : msg) + : msg ) ); } diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index 30d533193..48a72b86c 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -61,7 +61,10 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) =
      ) : ( - No Communities Found + + {' '} + No Communities Found + )} {openGraphView && ( = ({ loading, mode, graphonly_entities, in })}
    ) : ( - No Entities Found + + No Entities Found + )} {openGraphView && ( = ({ setProcessedCount, setchatModes, } = useFileContext(); - const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView' | 'neighborView'>( 'tableView' ); @@ -128,7 +127,9 @@ const Content: React.FC = ({ (async () => { showNormalToast(); try { - const payload = isGdsActive ? postProcessingTasks : postProcessingTasks.filter((task) => task !== 'enable_communities'); + const payload = isGdsActive + ? postProcessingTasks + : postProcessingTasks.filter((task) => task !== 'enable_communities'); const response = await postProcessing(userCredentials as UserCredentials, payload); if (response.data.status === 'Success') { const communityfiles = response.data?.data; @@ -509,8 +510,9 @@ const Content: React.FC = ({ const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; const uriCoded = userCredentials?.uri.replace(/:\d+$/, ''); - const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${userCredentials?.port ?? '7687' - }`; + const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${ + userCredentials?.port ?? '7687' + }`; const encodedURL = encodeURIComponent(connectURL); const replacedUrl = bloomUrl?.replace('{CONNECT_URL}', encodedURL); window.open(replacedUrl, '_blank'); @@ -520,10 +522,10 @@ const Content: React.FC = ({ isLeftExpanded && isRightExpanded ? 'contentWithExpansion' : isRightExpanded - ? 'contentWithChatBot' - : !isLeftExpanded && !isRightExpanded - ? 'w-[calc(100%-128px)]' - : 'contentWithDropzoneExpansion'; + ? 'contentWithChatBot' + : !isLeftExpanded && !isRightExpanded + ? 'w-[calc(100%-128px)]' + : 'contentWithDropzoneExpansion'; const handleGraphView = () => { setOpenGraphView(true); @@ -555,12 +557,12 @@ const Content: React.FC = ({ return prev.map((f) => { return f.name === filename ? { - ...f, - status: 'Ready to Reprocess', - processingProgress: isStartFromBegining ? 0 : f.processingProgress, - nodesCount: isStartFromBegining ? 0 : f.nodesCount, - relationshipsCount: isStartFromBegining ? 0 : f.relationshipsCount, - } + ...f, + status: 'Ready to Reprocess', + processingProgress: isStartFromBegining ? 0 : f.processingProgress, + nodesCount: isStartFromBegining ? 0 : f.nodesCount, + relationshipsCount: isStartFromBegining ? 0 : f.relationshipsCount, + } : f; }); }); @@ -869,8 +871,9 @@ const Content: React.FC = ({ handleGenerateGraph={processWaitingFilesOnRefresh} > diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 53903d032..2cf1d7255 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -970,28 +970,6 @@ const FileTable = forwardRef((props, ref) => { }), [table] ); - useEffect(() => { - if (tableRef.current) { - // Component has content, calculate maximum height for table - // Observes the height of the content and calculates own height accordingly - const resizeObserver = new ResizeObserver((entries) => { - for (let index = 0; index < entries.length; index++) { - const entry = entries[index]; - const { height } = entry.contentRect; - const rowHeight = document?.getElementsByClassName('ndl-data-grid-td')?.[0]?.clientHeight ?? 69; - table.setPageSize(Math.floor(height / rowHeight)); - } - }); - - const [contentElement] = document.getElementsByClassName('ndl-data-grid-scrollable'); - resizeObserver.observe(contentElement); - - return () => { - // Stop observing content after cleanup - resizeObserver.unobserve(contentElement); - }; - } - }, []); const classNameCheck = isExpanded ? 'fileTableWithExpansion' : `filetable`; diff --git a/frontend/src/components/Graph/GraphPropertiesTable.tsx b/frontend/src/components/Graph/GraphPropertiesTable.tsx index 753c200fb..fcabb0103 100644 --- a/frontend/src/components/Graph/GraphPropertiesTable.tsx +++ b/frontend/src/components/Graph/GraphPropertiesTable.tsx @@ -16,10 +16,10 @@ const GraphPropertiesTable = ({ propertiesWithTypes }: GraphPropertiesTableProps
    {key} diff --git a/frontend/src/components/Layout/Header.tsx b/frontend/src/components/Layout/Header.tsx index b969401b3..2cd243b29 100644 --- a/frontend/src/components/Layout/Header.tsx +++ b/frontend/src/components/Layout/Header.tsx @@ -56,7 +56,7 @@ const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnecti const userName = neo4jConnection.user; const { password } = neo4jConnection; const { database } = neo4jConnection; - const [,port]= uri.split(':'); + const [, port] = uri.split(':'); const encodedPassword = btoa(password); const chatUrl = `/chat-only?uri=${encodeURIComponent( uri diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index ac528f898..dca910e43 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -38,7 +38,6 @@ const PageLayout: React.FC = () => { const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); const [showGenericModal, toggleGenericModal] = useReducer((s) => !s, false); - const toggleLeftDrawer = () => { if (largedesktops) { setIsLeftExpanded(!isLeftExpanded); @@ -92,12 +91,7 @@ const PageLayout: React.FC = () => { } try { const parsedConnection = JSON.parse(neo4jConnection); - if ( - parsedConnection.uri && - parsedConnection.user && - parsedConnection.password && - parsedConnection.database - ) { + if (parsedConnection.uri && parsedConnection.user && parsedConnection.password && parsedConnection.database) { setUserCredentials({ uri: parsedConnection.uri, userName: parsedConnection.user, diff --git a/frontend/src/components/Popups/ChunkPopUp/index.tsx b/frontend/src/components/Popups/ChunkPopUp/index.tsx index b8020c276..c0e2a016b 100644 --- a/frontend/src/components/Popups/ChunkPopUp/index.tsx +++ b/frontend/src/components/Popups/ChunkPopUp/index.tsx @@ -30,7 +30,6 @@ const ChunkPopUp = ({ const sortedChunksData = useMemo(() => { return chunks.sort((a, b) => a.position - b.position); }, [chunks]); - return ( diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 97c584b7c..df30c25c1 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -104,12 +104,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - (d.e.elementId === nodeid + d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d) + : d ); }); }; @@ -143,7 +143,7 @@ export default function DeduplicationTab() { return (
    - (isGdsActive + isGdsActive ? postProcessingTasks.includes('enable_communities') ? postProcessingTasks : postProcessingTasks.filter((s) => s != 'enable_communities') - : postProcessingTasks.filter((s) => s != 'enable_communities')), + : postProcessingTasks.filter((s) => s != 'enable_communities'), [isGdsActive, postProcessingTasks] ); return ( diff --git a/frontend/src/components/Popups/RetryConfirmation/Index.tsx b/frontend/src/components/Popups/RetryConfirmation/Index.tsx index 4ae9b18eb..c6345a956 100644 --- a/frontend/src/components/Popups/RetryConfirmation/Index.tsx +++ b/frontend/src/components/Popups/RetryConfirmation/Index.tsx @@ -71,7 +71,7 @@ function RetryConfirmationDialog({ = ({ children }) => { const selectedNodeLabelstr = localStorage.getItem('selectedNodeLabels'); const selectedNodeRelsstr = localStorage.getItem('selectedRelationshipLabels'); const persistedQueue = localStorage.getItem('waitingQueue'); - const { userCredentials, isGdsActive } = useCredentials(); + const { userCredentials } = useCredentials(); const [files, setFiles] = useState<(File | null)[] | []>([]); const [filesData, setFilesData] = useState([]); const [queue, setQueue] = useState( diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 7314c55e7..82217535d 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -395,7 +395,7 @@ export const isFileCompleted = (waitingFile: CustomFile, item: SourceNode) => waitingFile && item.status === 'Completed'; export const calculateProcessedCount = (prev: number, batchSize: number) => - (prev === batchSize ? batchSize - 1 : prev + 1); + prev === batchSize ? batchSize - 1 : prev + 1; export const isProcessingFileValid = (item: SourceNode, userCredentials: UserCredentials) => { return item.status === 'Processing' && item.fileName != undefined && userCredentials && userCredentials.database; diff --git a/frontend/yarn.lock b/frontend/yarn.lock index 253a4e287..82c021a0e 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -505,9 +505,9 @@ tabbable "^6.0.0" "@floating-ui/react@^0.26.2": - version "0.26.27" - resolved "https://registry.yarnpkg.com/@floating-ui/react/-/react-0.26.27.tgz#402f7b4b2702650662705fe9cbe0f1d5607846a1" - integrity sha512-jLP72x0Kr2CgY6eTYi/ra3VA9LOkTo4C+DUTrbFgFOExKy3omYVmwMjNKqxAHdsnyLS96BIDLcO2SlnsNf8KUQ== + version "0.26.28" + resolved "https://registry.yarnpkg.com/@floating-ui/react/-/react-0.26.28.tgz#93f44ebaeb02409312e9df9507e83aab4a8c0dc7" + integrity sha512-yORQuuAtVpiRjpMhdc0wJj06b9JFjrYF4qp96j++v2NBpbi6SEGF7donUJ3TMieerQ6qVkAv1tgr7L4r5roTqw== dependencies: "@floating-ui/react-dom" "^2.1.2" "@floating-ui/utils" "^0.2.8" @@ -586,46 +586,33 @@ resolved "https://registry.yarnpkg.com/@humanwhocodes/object-schema/-/object-schema-2.0.3.tgz#4a2868d75d6d6963e423bcf90b7fd1be343409d3" integrity sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA== -"@internationalized/date@^3.5.6": - version "3.5.6" - resolved "https://registry.yarnpkg.com/@internationalized/date/-/date-3.5.6.tgz#0833c2fa75efb3573f4e3bf10e3895f1019e87dd" - integrity sha512-jLxQjefH9VI5P9UQuqB6qNKnvFt1Ky1TPIzHGsIlCi7sZZoMR8SdYbBGRvM0y+Jtb+ez4ieBzmiAUcpmPYpyOw== +"@internationalized/date@^3.6.0": + version "3.6.0" + resolved "https://registry.yarnpkg.com/@internationalized/date/-/date-3.6.0.tgz#b30d43030bfed1855f20c9503606926d75bfdf64" + integrity sha512-+z6ti+CcJnRlLHok/emGEsWQhe7kfSmEW+/6qCzvKY67YPh7YOBfvc7+/+NXq+zJlbArg30tYpqLjNgcAYv2YQ== dependencies: "@swc/helpers" "^0.5.0" -"@internationalized/message@^3.1.5": - version "3.1.5" - resolved "https://registry.yarnpkg.com/@internationalized/message/-/message-3.1.5.tgz#7391bba7a0489022a099f5bc37eb161889d520c8" - integrity sha512-hjEpLKFlYA3m5apldLqzHqw531qqfOEq0HlTWdfyZmcloWiUbWsYXD6YTiUmQmOtarthzhdjCAwMVrB8a4E7uA== +"@internationalized/message@^3.1.6": + version "3.1.6" + resolved "https://registry.yarnpkg.com/@internationalized/message/-/message-3.1.6.tgz#e5a832788a17214bfb3e5bbf5f0e23ed2f568ad7" + integrity sha512-JxbK3iAcTIeNr1p0WIFg/wQJjIzJt9l/2KNY/48vXV7GRGZSv3zMxJsce008fZclk2cDC8y0Ig3odceHO7EfNQ== dependencies: "@swc/helpers" "^0.5.0" intl-messageformat "^10.1.0" -"@internationalized/number@^3.5.4": - version "3.5.4" - resolved "https://registry.yarnpkg.com/@internationalized/number/-/number-3.5.4.tgz#db1c648fa191b28062c2f4fd81fac89777ad3e91" - integrity sha512-h9huwWjNqYyE2FXZZewWqmCdkw1HeFds5q4Siuoms3hUQC5iPJK3aBmkFZoDSLN4UD0Bl8G22L/NdHpeOr+/7A== +"@internationalized/number@^3.6.0": + version "3.6.0" + resolved "https://registry.yarnpkg.com/@internationalized/number/-/number-3.6.0.tgz#dc6ba20c41b25eb605f1d5cac7d8668e9022c224" + integrity sha512-PtrRcJVy7nw++wn4W2OuePQQfTqDzfusSuY1QTtui4wa7r+rGVtR75pO8CyKvHvzyQYi3Q1uO5sY0AsB4e65Bw== dependencies: "@swc/helpers" "^0.5.0" -"@internationalized/string@^3.2.4": - version "3.2.4" - resolved "https://registry.yarnpkg.com/@internationalized/string/-/string-3.2.4.tgz#e05ddd93a7b83e936940c83f5b3a8597d8c3a370" - integrity sha512-BcyadXPn89Ae190QGZGDUZPqxLj/xsP4U1Br1oSy8yfIjmpJ8cJtGYleaodqW/EmzFjwELtwDojLkf3FhV6SjA== +"@internationalized/string@^3.2.4", "@internationalized/string@^3.2.5": + version "3.2.5" + resolved "https://registry.yarnpkg.com/@internationalized/string/-/string-3.2.5.tgz#2f387b256e79596a2e62ddd5e15c619fe241189c" + integrity sha512-rKs71Zvl2OKOHM+mzAFMIyqR5hI1d1O6BBkMK2/lkfg3fkmVh9Eeg0awcA8W2WqYqDOv6a86DIOlFpggwLtbuw== dependencies: - "@swc/helpers" "^0.5.0" - -"@isaacs/cliui@^8.0.2": - version "8.0.2" - resolved "https://registry.yarnpkg.com/@isaacs/cliui/-/cliui-8.0.2.tgz#b37667b7bc181c168782259bab42474fbf52b550" - integrity sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA== - dependencies: - string-width "^5.1.2" - string-width-cjs "npm:string-width@^4.2.0" - strip-ansi "^7.0.1" - strip-ansi-cjs "npm:strip-ansi@^6.0.1" - wrap-ansi "^8.1.0" - wrap-ansi-cjs "npm:wrap-ansi@^7.0.0" "@jridgewell/gen-mapping@^0.3.2", "@jridgewell/gen-mapping@^0.3.5": version "0.3.5" @@ -760,15 +747,15 @@ "@types/chroma-js" "2.1.4" chroma-js "2.4.2" -"@neo4j-ndl/base@^3.0.14": - version "3.0.14" - resolved "https://registry.yarnpkg.com/@neo4j-ndl/base/-/base-3.0.14.tgz#158db809cec3c986cf9170f8967f37cbf5740f61" - integrity sha512-GVRuibSXvhKsJMJH4J7ROQ1yyvNhvyiPBGewFJHR3fspxi/nKV4dh0jCspdSEdrtAhdhI5VGdwQlSDJREjEPzA== +"@neo4j-ndl/base@^3.0.16": + version "3.0.16" + resolved "https://registry.yarnpkg.com/@neo4j-ndl/base/-/base-3.0.16.tgz#8e20072c14f6249e901a2696ebdbd5ed60e63438" + integrity sha512-elmtzwdF4AxMHNhLnZJL4V3v6QH5Sv+6RZ1V4HY819w49f7cY3Sd1BYdA9/fu5ADrDOzY+fzPc5BUlIuYxbYaw== -"@neo4j-ndl/react@^3.0.23": - version "3.0.23" - resolved "https://registry.yarnpkg.com/@neo4j-ndl/react/-/react-3.0.23.tgz#07b1c24f5ca5adee630318b8a0a67419b5736a53" - integrity sha512-/szMXh/2ROpAVrNrNTOP4IK5eMWB5hRNMHXfu7NJOyqYAhWJlMkjZQClGEM7qvBAX6RoV8mmRWafquWMZhdnoA== +"@neo4j-ndl/react@^3.0.30": + version "3.0.30" + resolved "https://registry.yarnpkg.com/@neo4j-ndl/react/-/react-3.0.30.tgz#9476f95914876dbc7dc16a137ca53ea2b58ceb55" + integrity sha512-V4fwM56qM1JXeFOQfQLxvyYAaZm7uhmp9dfx2zSM41tIkS4gDTKlW4tIZG5bKQpE3LKAANpKjlQxsa14w14/5A== dependencies: "@dnd-kit/core" "6.1.0" "@dnd-kit/sortable" "8.0.0" @@ -778,8 +765,6 @@ "@table-nav/core" "0.0.7" "@table-nav/react" "0.0.7" classnames "2.5.1" - d3 "7.9.0" - d3-shape "3.2.0" date-fns "4.1.0" detect-browser "5.3.0" re-resizable "6.10.0" @@ -877,583 +862,596 @@ integrity sha512-P1st0aksCrn9sGZhp8GMYwBnQsbvAWsZAX44oXNNvLHGqAOcoVxmjZiohstwQ7SqKnbR47akdNi+uleWD8+g6A== "@react-aria/breadcrumbs@^3.5.17": - version "3.5.18" - resolved "https://registry.yarnpkg.com/@react-aria/breadcrumbs/-/breadcrumbs-3.5.18.tgz#72133bdb61744f538fba41f9e06864f980860e1b" - integrity sha512-JRc6nAwQsjqsPw/3MlGwJcVo9ACZDbCOwWNNEnj8mR0fQopJO5xliq3qVzxDRZjdYrVUfTTyKXuepv/jMB1Y6Q== - dependencies: - "@react-aria/i18n" "^3.12.3" - "@react-aria/link" "^3.7.6" - "@react-aria/utils" "^3.25.3" - "@react-types/breadcrumbs" "^3.7.8" - "@react-types/shared" "^3.25.0" + version "3.5.19" + resolved "https://registry.yarnpkg.com/@react-aria/breadcrumbs/-/breadcrumbs-3.5.19.tgz#e0a67e0e7017089fa0ee5eadd51a6da505b94cd4" + integrity sha512-mVngOPFYVVhec89rf/CiYQGTfaLRfHFtX+JQwY7sNYNqSA+gO8p4lNARe3Be6bJPgH+LUQuruIY9/ZDL6LT3HA== + dependencies: + "@react-aria/i18n" "^3.12.4" + "@react-aria/link" "^3.7.7" + "@react-aria/utils" "^3.26.0" + "@react-types/breadcrumbs" "^3.7.9" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/button@^3.10.0": - version "3.10.1" - resolved "https://registry.yarnpkg.com/@react-aria/button/-/button-3.10.1.tgz#348879949a612a90c056f69079edb1c1414f7ff1" - integrity sha512-1vkRsjdvJrJleK73u7ClrW4Fw3mtr2hIs8M2yLZUpLoqHXnIYJwmeEMtzwyPFYKBc5jaHcGXw45any7Puy1aFA== - dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/interactions" "^3.22.4" - "@react-aria/utils" "^3.25.3" - "@react-stately/toggle" "^3.7.8" - "@react-types/button" "^3.10.0" - "@react-types/shared" "^3.25.0" + version "3.11.0" + resolved "https://registry.yarnpkg.com/@react-aria/button/-/button-3.11.0.tgz#cb7790db23949ec9c1e698fa531ee5471cf2b515" + integrity sha512-b37eIV6IW11KmNIAm65F3SEl2/mgj5BrHIysW6smZX3KoKWTGYsYfcQkmtNgY0GOSFfDxMCoolsZ6mxC00nSDA== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/interactions" "^3.22.5" + "@react-aria/toolbar" "3.0.0-beta.11" + "@react-aria/utils" "^3.26.0" + "@react-stately/toggle" "^3.8.0" + "@react-types/button" "^3.10.1" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/calendar@^3.5.12": - version "3.5.13" - resolved "https://registry.yarnpkg.com/@react-aria/calendar/-/calendar-3.5.13.tgz#7cf8787b521c8fc193550ce27acc114390502b16" - integrity sha512-BJV5IwIH4UPDa6/HRTOBcM1wC+/6p823VrbocV9mr+rt5cCnuh+cqcCQKqUSEbfaTMPrmabjBuEaQIvqjLRYUA== - dependencies: - "@internationalized/date" "^3.5.6" - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/live-announcer" "^3.4.0" - "@react-aria/utils" "^3.25.3" - "@react-stately/calendar" "^3.5.5" - "@react-types/button" "^3.10.0" - "@react-types/calendar" "^3.4.10" - "@react-types/shared" "^3.25.0" + version "3.6.0" + resolved "https://registry.yarnpkg.com/@react-aria/calendar/-/calendar-3.6.0.tgz#d5e7cf4beb8724648a7042dbc5bb519de4351906" + integrity sha512-tZ3nd5DP8uxckbj83Pt+4RqgcTWDlGi7njzc7QqFOG2ApfnYDUXbIpb/Q4KY6JNlJskG8q33wo0XfOwNy8J+eg== + dependencies: + "@internationalized/date" "^3.6.0" + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/live-announcer" "^3.4.1" + "@react-aria/utils" "^3.26.0" + "@react-stately/calendar" "^3.6.0" + "@react-types/button" "^3.10.1" + "@react-types/calendar" "^3.5.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/checkbox@^3.14.7": - version "3.14.8" - resolved "https://registry.yarnpkg.com/@react-aria/checkbox/-/checkbox-3.14.8.tgz#b4117f420e44d3a2693029453bb673d729c14f99" - integrity sha512-0qPJ3fiQQm7tiMHmIhR9iokr/MhhI2h6OWX/pDeIy/Gj63WSVk+Cka3NUhgMRGkguHKDZPKaFjK1oZQsXhCThQ== - dependencies: - "@react-aria/form" "^3.0.10" - "@react-aria/interactions" "^3.22.4" - "@react-aria/label" "^3.7.12" - "@react-aria/toggle" "^3.10.9" - "@react-aria/utils" "^3.25.3" - "@react-stately/checkbox" "^3.6.9" - "@react-stately/form" "^3.0.6" - "@react-stately/toggle" "^3.7.8" - "@react-types/checkbox" "^3.8.4" - "@react-types/shared" "^3.25.0" + version "3.15.0" + resolved "https://registry.yarnpkg.com/@react-aria/checkbox/-/checkbox-3.15.0.tgz#4d224b71c65d6a079ff935ab22c806323f84b746" + integrity sha512-z/8xd4em7o0MroBXwkkwv7QRwiJaA1FwqMhRUb7iqtBGP2oSytBEDf0N7L09oci32a1P4ZPz2rMK5GlLh/PD6g== + dependencies: + "@react-aria/form" "^3.0.11" + "@react-aria/interactions" "^3.22.5" + "@react-aria/label" "^3.7.13" + "@react-aria/toggle" "^3.10.10" + "@react-aria/utils" "^3.26.0" + "@react-stately/checkbox" "^3.6.10" + "@react-stately/form" "^3.1.0" + "@react-stately/toggle" "^3.8.0" + "@react-types/checkbox" "^3.9.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/color@^3.0.0": - version "3.0.1" - resolved "https://registry.yarnpkg.com/@react-aria/color/-/color-3.0.1.tgz#3c78693a1a7a33ca421da9c637b696ec9dee601b" - integrity sha512-7hTCdXCU2/qpZuIrJcVr+s87C2MqHfi9Y461gMza5DjdUzlcy480UZ/iknbw82C0a+oVo08D/bnQctEjja05pw== - dependencies: - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/numberfield" "^3.11.8" - "@react-aria/slider" "^3.7.13" - "@react-aria/spinbutton" "^3.6.9" - "@react-aria/textfield" "^3.14.10" - "@react-aria/utils" "^3.25.3" - "@react-aria/visually-hidden" "^3.8.17" - "@react-stately/color" "^3.8.0" - "@react-stately/form" "^3.0.6" - "@react-types/color" "^3.0.0" - "@react-types/shared" "^3.25.0" + version "3.0.2" + resolved "https://registry.yarnpkg.com/@react-aria/color/-/color-3.0.2.tgz#3abeb7e9fa9756e1823e513921e04dcaa47b25cc" + integrity sha512-dSM5qQRcR1gRGYCBw0IGRmc29gjfoht3cQleKb8MMNcgHYa2oi5VdCs2yKXmYFwwVC6uPtnlNy9S6e0spqdr+w== + dependencies: + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/numberfield" "^3.11.9" + "@react-aria/slider" "^3.7.14" + "@react-aria/spinbutton" "^3.6.10" + "@react-aria/textfield" "^3.15.0" + "@react-aria/utils" "^3.26.0" + "@react-aria/visually-hidden" "^3.8.18" + "@react-stately/color" "^3.8.1" + "@react-stately/form" "^3.1.0" + "@react-types/color" "^3.0.1" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/combobox@^3.10.4": - version "3.10.5" - resolved "https://registry.yarnpkg.com/@react-aria/combobox/-/combobox-3.10.5.tgz#2530829da5e3bdae1ab3f42288f85ce683654830" - integrity sha512-1cjBJXWYuR0de+9IEU1MOer3H5FSlbrdaqlWo+M6vvMymBL2OjjwXiG3LY1mR65ZwHoTswXzt6/mujUKaxk5vw== - dependencies: - "@react-aria/i18n" "^3.12.3" - "@react-aria/listbox" "^3.13.5" - "@react-aria/live-announcer" "^3.4.0" - "@react-aria/menu" "^3.15.5" - "@react-aria/overlays" "^3.23.4" - "@react-aria/selection" "^3.20.1" - "@react-aria/textfield" "^3.14.10" - "@react-aria/utils" "^3.25.3" - "@react-stately/collections" "^3.11.0" - "@react-stately/combobox" "^3.10.0" - "@react-stately/form" "^3.0.6" - "@react-types/button" "^3.10.0" - "@react-types/combobox" "^3.13.0" - "@react-types/shared" "^3.25.0" + version "3.11.0" + resolved "https://registry.yarnpkg.com/@react-aria/combobox/-/combobox-3.11.0.tgz#9489aaad342d092bf1fe1c4c382f6714316ac1c4" + integrity sha512-s88YMmPkMO1WSoiH1KIyZDLJqUwvM2wHXXakj3cYw1tBHGo4rOUFq+JWQIbM5EDO4HOR4AUUqzIUd0NO7t3zyg== + dependencies: + "@react-aria/i18n" "^3.12.4" + "@react-aria/listbox" "^3.13.6" + "@react-aria/live-announcer" "^3.4.1" + "@react-aria/menu" "^3.16.0" + "@react-aria/overlays" "^3.24.0" + "@react-aria/selection" "^3.21.0" + "@react-aria/textfield" "^3.15.0" + "@react-aria/utils" "^3.26.0" + "@react-stately/collections" "^3.12.0" + "@react-stately/combobox" "^3.10.1" + "@react-stately/form" "^3.1.0" + "@react-types/button" "^3.10.1" + "@react-types/combobox" "^3.13.1" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/datepicker@^3.11.3": - version "3.11.4" - resolved "https://registry.yarnpkg.com/@react-aria/datepicker/-/datepicker-3.11.4.tgz#f0a927e7958cd341de8d4ccd78cf28ec69ecbdf6" - integrity sha512-TXe1TB/pSwrIQ5BIDr6NCAYjBaKgLN6cP5DlAihywHzqxbM6vO8GU6qbrZNSBrtfzZnrR/4z66Vlw6rhznLnqQ== - dependencies: - "@internationalized/date" "^3.5.6" - "@internationalized/number" "^3.5.4" - "@internationalized/string" "^3.2.4" - "@react-aria/focus" "^3.18.4" - "@react-aria/form" "^3.0.10" - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/label" "^3.7.12" - "@react-aria/spinbutton" "^3.6.9" - "@react-aria/utils" "^3.25.3" - "@react-stately/datepicker" "^3.10.3" - "@react-stately/form" "^3.0.6" - "@react-types/button" "^3.10.0" - "@react-types/calendar" "^3.4.10" - "@react-types/datepicker" "^3.8.3" - "@react-types/dialog" "^3.5.13" - "@react-types/shared" "^3.25.0" + version "3.12.0" + resolved "https://registry.yarnpkg.com/@react-aria/datepicker/-/datepicker-3.12.0.tgz#a82ff3ebd3ead20a00096d082c1e6be47bbd5886" + integrity sha512-VYNXioLfddIHpwQx211+rTYuunDmI7VHWBRetCpH3loIsVFuhFSRchTQpclAzxolO3g0vO7pMVj9VYt7Swp6kg== + dependencies: + "@internationalized/date" "^3.6.0" + "@internationalized/number" "^3.6.0" + "@internationalized/string" "^3.2.5" + "@react-aria/focus" "^3.19.0" + "@react-aria/form" "^3.0.11" + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/label" "^3.7.13" + "@react-aria/spinbutton" "^3.6.10" + "@react-aria/utils" "^3.26.0" + "@react-stately/datepicker" "^3.11.0" + "@react-stately/form" "^3.1.0" + "@react-types/button" "^3.10.1" + "@react-types/calendar" "^3.5.0" + "@react-types/datepicker" "^3.9.0" + "@react-types/dialog" "^3.5.14" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/dialog@^3.5.18": - version "3.5.19" - resolved "https://registry.yarnpkg.com/@react-aria/dialog/-/dialog-3.5.19.tgz#568b937e8f63078dabcccbc291ef0cef044aa2d0" - integrity sha512-I3AJWpAWCajj8Ama8qLQ18Tc37ODyk+Ym3haYEl5L4QnuFc0dU1sMJr15fppDGIxYjwvTTfctyhaSCz+S+wpkw== - dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/overlays" "^3.23.4" - "@react-aria/utils" "^3.25.3" - "@react-types/dialog" "^3.5.13" - "@react-types/shared" "^3.25.0" + version "3.5.20" + resolved "https://registry.yarnpkg.com/@react-aria/dialog/-/dialog-3.5.20.tgz#6404d2c1bab1ea9ecce3ebc7adce64733ecea985" + integrity sha512-l0GZVLgeOd3kL3Yj8xQW7wN3gn9WW3RLd/SGI9t7ciTq+I/FhftjXCWzXLlOCCTLMf+gv7eazecECtmoWUaZWQ== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/overlays" "^3.24.0" + "@react-aria/utils" "^3.26.0" + "@react-types/dialog" "^3.5.14" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/dnd@^3.7.3": - version "3.7.4" - resolved "https://registry.yarnpkg.com/@react-aria/dnd/-/dnd-3.7.4.tgz#3e30f94569a39b381ebf353bb3d22e2a843fbd6f" - integrity sha512-lRE8SVyK/MPbF6NiVXHoriOV0QulNKkSndyDr3TWPsLhH5GKQso5jSx8/5ogbDgRTzIsmIQldj/HlW238DCiSg== - dependencies: - "@internationalized/string" "^3.2.4" - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/live-announcer" "^3.4.0" - "@react-aria/overlays" "^3.23.4" - "@react-aria/utils" "^3.25.3" - "@react-stately/dnd" "^3.4.3" - "@react-types/button" "^3.10.0" - "@react-types/shared" "^3.25.0" + version "3.8.0" + resolved "https://registry.yarnpkg.com/@react-aria/dnd/-/dnd-3.8.0.tgz#7e3bfa3f509efeb9a872686e65641a719684e95a" + integrity sha512-JiqHY3E9fDU5Kb4gN22cuK6QNlpMCGe6ngR/BV+Q8mLEsdoWcoUAYOtYXVNNTRvCdVbEWI87FUU+ThyPpoDhNQ== + dependencies: + "@internationalized/string" "^3.2.5" + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/live-announcer" "^3.4.1" + "@react-aria/overlays" "^3.24.0" + "@react-aria/utils" "^3.26.0" + "@react-stately/dnd" "^3.5.0" + "@react-types/button" "^3.10.1" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/focus@^3.18.3", "@react-aria/focus@^3.18.4": - version "3.18.4" - resolved "https://registry.yarnpkg.com/@react-aria/focus/-/focus-3.18.4.tgz#a6e95896bc8680d1b5bcd855e983fc2c195a1a55" - integrity sha512-91J35077w9UNaMK1cpMUEFRkNNz0uZjnSwiyBCFuRdaVuivO53wNC9XtWSDNDdcO5cGy87vfJRVAiyoCn/mjqA== +"@react-aria/focus@^3.18.3", "@react-aria/focus@^3.19.0": + version "3.19.0" + resolved "https://registry.yarnpkg.com/@react-aria/focus/-/focus-3.19.0.tgz#82b9a5b83f023b943a7970df3d059f49d61df05d" + integrity sha512-hPF9EXoUQeQl1Y21/rbV2H4FdUR2v+4/I0/vB+8U3bT1CJ+1AFj1hc/rqx2DqEwDlEwOHN+E4+mRahQmlybq0A== dependencies: - "@react-aria/interactions" "^3.22.4" - "@react-aria/utils" "^3.25.3" - "@react-types/shared" "^3.25.0" + "@react-aria/interactions" "^3.22.5" + "@react-aria/utils" "^3.26.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" clsx "^2.0.0" -"@react-aria/form@^3.0.10": - version "3.0.10" - resolved "https://registry.yarnpkg.com/@react-aria/form/-/form-3.0.10.tgz#0d21bd33aac4153fcbfdd87cc04fce9f8e148650" - integrity sha512-hWBrqEXxBxcpYTJv0telQKaiu2728EUFHta8/RGBqJ4+MhKKxI7+PnLoms78IuiK0MCYvukHfun1fuQvK+8jsg== +"@react-aria/form@^3.0.11": + version "3.0.11" + resolved "https://registry.yarnpkg.com/@react-aria/form/-/form-3.0.11.tgz#84511874e1fad5f981bae97ebd4d549923849455" + integrity sha512-oXzjTiwVuuWjZ8muU0hp3BrDH5qjVctLOF50mjPvqUbvXQTHhoDxWweyIXPQjGshaqBd2w4pWaE4A2rG2O/apw== dependencies: - "@react-aria/interactions" "^3.22.4" - "@react-aria/utils" "^3.25.3" - "@react-stately/form" "^3.0.6" - "@react-types/shared" "^3.25.0" + "@react-aria/interactions" "^3.22.5" + "@react-aria/utils" "^3.26.0" + "@react-stately/form" "^3.1.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/grid@^3.10.5": - version "3.10.5" - resolved "https://registry.yarnpkg.com/@react-aria/grid/-/grid-3.10.5.tgz#34caf94aa2442949e75a825684f6b7bea0b8af43" - integrity sha512-9sLa+rpLgRZk7VX+tvdSudn1tdVgolVzhDLGWd95yS4UtPVMihTMGBrRoByY57Wxvh1V+7Ptw8kc6tsRSotYKg== - dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/live-announcer" "^3.4.0" - "@react-aria/selection" "^3.20.1" - "@react-aria/utils" "^3.25.3" - "@react-stately/collections" "^3.11.0" - "@react-stately/grid" "^3.9.3" - "@react-stately/selection" "^3.17.0" - "@react-types/checkbox" "^3.8.4" - "@react-types/grid" "^3.2.9" - "@react-types/shared" "^3.25.0" +"@react-aria/grid@^3.11.0": + version "3.11.0" + resolved "https://registry.yarnpkg.com/@react-aria/grid/-/grid-3.11.0.tgz#5ad6596745482e109b3b47f1fec7ce372f632707" + integrity sha512-lN5FpQgu2Rq0CzTPWmzRpq6QHcMmzsXYeClsgO3108uVp1/genBNAObYVTxGOKe/jb9q99trz8EtIn05O6KN1g== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/live-announcer" "^3.4.1" + "@react-aria/selection" "^3.21.0" + "@react-aria/utils" "^3.26.0" + "@react-stately/collections" "^3.12.0" + "@react-stately/grid" "^3.10.0" + "@react-stately/selection" "^3.18.0" + "@react-types/checkbox" "^3.9.0" + "@react-types/grid" "^3.2.10" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/gridlist@^3.9.4", "@react-aria/gridlist@^3.9.5": - version "3.9.5" - resolved "https://registry.yarnpkg.com/@react-aria/gridlist/-/gridlist-3.9.5.tgz#a1e6dbe7e14be9203630c76bfb73b55cd4d958c4" - integrity sha512-LM+3D0amZZ1qiyqWVG52j0YRWt2chdpx+WG80ryDKwHLDIq7uz1+KXyIfv8cFt/cZcl6+9Ft3kWALCAi6O4NLA== - dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/grid" "^3.10.5" - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/selection" "^3.20.1" - "@react-aria/utils" "^3.25.3" - "@react-stately/collections" "^3.11.0" - "@react-stately/list" "^3.11.0" - "@react-stately/tree" "^3.8.5" - "@react-types/shared" "^3.25.0" +"@react-aria/gridlist@^3.10.0", "@react-aria/gridlist@^3.9.4": + version "3.10.0" + resolved "https://registry.yarnpkg.com/@react-aria/gridlist/-/gridlist-3.10.0.tgz#adc2f9896a2759bb29cec428378c8ac85235a110" + integrity sha512-UcblfSZ7kJBrjg9mQ5VbnRevN81UiYB4NuL5PwIpBpridO7tnl4ew6+96PYU7Wj1chHhPS3x0b0zmuSVN7A0LA== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/grid" "^3.11.0" + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/selection" "^3.21.0" + "@react-aria/utils" "^3.26.0" + "@react-stately/collections" "^3.12.0" + "@react-stately/list" "^3.11.1" + "@react-stately/tree" "^3.8.6" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/i18n@^3.12.3": - version "3.12.3" - resolved "https://registry.yarnpkg.com/@react-aria/i18n/-/i18n-3.12.3.tgz#ec902787ea840755a1e7b4feb64435b8451baf62" - integrity sha512-0Tp/4JwnCVNKDfuknPF+/xf3/woOc8gUjTU2nCjO3mCVb4FU7KFtjxQ2rrx+6hpIVG6g+N9qfMjRa/ggVH0CJg== - dependencies: - "@internationalized/date" "^3.5.6" - "@internationalized/message" "^3.1.5" - "@internationalized/number" "^3.5.4" - "@internationalized/string" "^3.2.4" - "@react-aria/ssr" "^3.9.6" - "@react-aria/utils" "^3.25.3" - "@react-types/shared" "^3.25.0" +"@react-aria/i18n@^3.12.3", "@react-aria/i18n@^3.12.4": + version "3.12.4" + resolved "https://registry.yarnpkg.com/@react-aria/i18n/-/i18n-3.12.4.tgz#4520ce48a1b6ebe4aa470d72eba300e65de01814" + integrity sha512-j9+UL3q0Ls8MhXV9gtnKlyozq4aM95YywXqnmJtzT1rYeBx7w28hooqrWkCYLfqr4OIryv1KUnPiCSLwC2OC7w== + dependencies: + "@internationalized/date" "^3.6.0" + "@internationalized/message" "^3.1.6" + "@internationalized/number" "^3.6.0" + "@internationalized/string" "^3.2.5" + "@react-aria/ssr" "^3.9.7" + "@react-aria/utils" "^3.26.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/interactions@^3.22.3", "@react-aria/interactions@^3.22.4": - version "3.22.4" - resolved "https://registry.yarnpkg.com/@react-aria/interactions/-/interactions-3.22.4.tgz#88ed61ab6a485f869bc1f65ae6688d48ca96064b" - integrity sha512-E0vsgtpItmknq/MJELqYJwib+YN18Qag8nroqwjk1qOnBa9ROIkUhWJerLi1qs5diXq9LHKehZDXRlwPvdEFww== +"@react-aria/interactions@^3.22.3", "@react-aria/interactions@^3.22.5": + version "3.22.5" + resolved "https://registry.yarnpkg.com/@react-aria/interactions/-/interactions-3.22.5.tgz#9cd8c93b8b6988f1d315d3efb450119d1432bbb8" + integrity sha512-kMwiAD9E0TQp+XNnOs13yVJghiy8ET8L0cbkeuTgNI96sOAp/63EJ1FSrDf17iD8sdjt41LafwX/dKXW9nCcLQ== dependencies: - "@react-aria/ssr" "^3.9.6" - "@react-aria/utils" "^3.25.3" - "@react-types/shared" "^3.25.0" + "@react-aria/ssr" "^3.9.7" + "@react-aria/utils" "^3.26.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/label@^3.7.12": - version "3.7.12" - resolved "https://registry.yarnpkg.com/@react-aria/label/-/label-3.7.12.tgz#d6aa0dca5d0ba280fd6f15c1f05327095a2526c5" - integrity sha512-u9xT90lAlgb7xiv+p0md9QwCHz65XL7tjS5e29e88Rs3ptkv3aQubTqxVOUTEwzbNUT4A1QqTjUm1yfHewIRUw== +"@react-aria/label@^3.7.12", "@react-aria/label@^3.7.13": + version "3.7.13" + resolved "https://registry.yarnpkg.com/@react-aria/label/-/label-3.7.13.tgz#9e7153a1ded878b5147d141effc3eb226f3c6c1f" + integrity sha512-brSAXZVTey5RG/Ex6mTrV/9IhGSQFU4Al34qmjEDho+Z2qT4oPwf8k7TRXWWqzOU0ugYxekYbsLd2zlN3XvWcg== dependencies: - "@react-aria/utils" "^3.25.3" - "@react-types/shared" "^3.25.0" + "@react-aria/utils" "^3.26.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/link@^3.7.5", "@react-aria/link@^3.7.6": - version "3.7.6" - resolved "https://registry.yarnpkg.com/@react-aria/link/-/link-3.7.6.tgz#d71e9f1c16b671f017b69f078887432ffc65ac65" - integrity sha512-8buJznRWoOud8ApygUAz7TsshXNs6HDGB6YOYEJxy0WTKILn0U5NUymw2PWC14+bWRPelHMKmi6vbFBrJWzSzQ== +"@react-aria/link@^3.7.5", "@react-aria/link@^3.7.7": + version "3.7.7" + resolved "https://registry.yarnpkg.com/@react-aria/link/-/link-3.7.7.tgz#5879c75068b63d55353b3e96b4fda0fa8753d1ad" + integrity sha512-eVBRcHKhNSsATYWv5wRnZXRqPVcKAWWakyvfrYePIKpC3s4BaHZyTGYdefk8ZwZdEOuQZBqLMnjW80q1uhtkuA== dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/interactions" "^3.22.4" - "@react-aria/utils" "^3.25.3" - "@react-types/link" "^3.5.8" - "@react-types/shared" "^3.25.0" + "@react-aria/focus" "^3.19.0" + "@react-aria/interactions" "^3.22.5" + "@react-aria/utils" "^3.26.0" + "@react-types/link" "^3.5.9" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/listbox@^3.13.4", "@react-aria/listbox@^3.13.5": - version "3.13.5" - resolved "https://registry.yarnpkg.com/@react-aria/listbox/-/listbox-3.13.5.tgz#67b628c5fc9c5dbee327a012ae82d05ebc60b75d" - integrity sha512-tn32L/PIELIPYfDWCJ3OBRvvb/jCEvIzs6IYs8xCISV5W4853Je/WnA8wumWnz07U9sODYFmHUx2ThO7Z7dH7Q== - dependencies: - "@react-aria/interactions" "^3.22.4" - "@react-aria/label" "^3.7.12" - "@react-aria/selection" "^3.20.1" - "@react-aria/utils" "^3.25.3" - "@react-stately/collections" "^3.11.0" - "@react-stately/list" "^3.11.0" - "@react-types/listbox" "^3.5.2" - "@react-types/shared" "^3.25.0" +"@react-aria/listbox@^3.13.4", "@react-aria/listbox@^3.13.6": + version "3.13.6" + resolved "https://registry.yarnpkg.com/@react-aria/listbox/-/listbox-3.13.6.tgz#43ff24f4a6540a9952729833201460fa6ab081f7" + integrity sha512-6hEXEXIZVau9lgBZ4VVjFR3JnGU+fJaPmV3HP0UZ2ucUptfG0MZo24cn+ZQJsWiuaCfNFv5b8qribiv+BcO+Kg== + dependencies: + "@react-aria/interactions" "^3.22.5" + "@react-aria/label" "^3.7.13" + "@react-aria/selection" "^3.21.0" + "@react-aria/utils" "^3.26.0" + "@react-stately/collections" "^3.12.0" + "@react-stately/list" "^3.11.1" + "@react-types/listbox" "^3.5.3" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/live-announcer@^3.4.0": - version "3.4.0" - resolved "https://registry.yarnpkg.com/@react-aria/live-announcer/-/live-announcer-3.4.0.tgz#0ad90fddc4731e93071d802c8cec9e1dfd2fc448" - integrity sha512-VBxEdMq2SbtRbNTQNcDR2G6E3lEl5cJSBiHTTO8Ln1AL76LiazrylIXGgoktqzCfRQmyq0v8CHk1cNKDU9mvJg== +"@react-aria/live-announcer@^3.4.1": + version "3.4.1" + resolved "https://registry.yarnpkg.com/@react-aria/live-announcer/-/live-announcer-3.4.1.tgz#efedf706b23f6e1b526a3a35c14c202ac3e68487" + integrity sha512-4X2mcxgqLvvkqxv2l1n00jTzUxxe0kkLiapBGH1LHX/CxA1oQcHDqv8etJ2ZOwmS/MSBBiWnv3DwYHDOF6ubig== dependencies: "@swc/helpers" "^0.5.0" -"@react-aria/menu@^3.15.4", "@react-aria/menu@^3.15.5": - version "3.15.5" - resolved "https://registry.yarnpkg.com/@react-aria/menu/-/menu-3.15.5.tgz#b133a0d60da94dfbbd1bfe3db485cf91ccfff900" - integrity sha512-ygfS032hJSZCYYbMHnUSmUTVMaz99L9AUZ9kMa6g+k2X1t92K1gXfhYYkoClQD6+G0ch7zm0SwYFlUmRf9yOEA== - dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/overlays" "^3.23.4" - "@react-aria/selection" "^3.20.1" - "@react-aria/utils" "^3.25.3" - "@react-stately/collections" "^3.11.0" - "@react-stately/menu" "^3.8.3" - "@react-stately/tree" "^3.8.5" - "@react-types/button" "^3.10.0" - "@react-types/menu" "^3.9.12" - "@react-types/shared" "^3.25.0" +"@react-aria/menu@^3.15.4", "@react-aria/menu@^3.16.0": + version "3.16.0" + resolved "https://registry.yarnpkg.com/@react-aria/menu/-/menu-3.16.0.tgz#119e562806e9f8a39fd468ab790d788905c6df83" + integrity sha512-TNk+Vd3TbpBPUxEloAdHRTaRxf9JBK7YmkHYiq0Yj5Lc22KS0E2eTyhpPM9xJvEWN2TlC5TEvNfdyui2kYWFFQ== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/overlays" "^3.24.0" + "@react-aria/selection" "^3.21.0" + "@react-aria/utils" "^3.26.0" + "@react-stately/collections" "^3.12.0" + "@react-stately/menu" "^3.9.0" + "@react-stately/selection" "^3.18.0" + "@react-stately/tree" "^3.8.6" + "@react-types/button" "^3.10.1" + "@react-types/menu" "^3.9.13" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/meter@^3.4.17": - version "3.4.17" - resolved "https://registry.yarnpkg.com/@react-aria/meter/-/meter-3.4.17.tgz#ace0323e9d74c1967b4a700db744161aff366084" - integrity sha512-08wbQhfvVWzpWilhn/WD7cQ7TqafS/66umTk7+X6BW6TrS1//6loNNJV62IC3F7sskel4iEAtl2gW0WpW8zEdg== + version "3.4.18" + resolved "https://registry.yarnpkg.com/@react-aria/meter/-/meter-3.4.18.tgz#ff3f85f32ea30285e7e73386a641efdcedd88205" + integrity sha512-tTX3LLlmDIHqrC42dkdf+upb1c4UbhlpZ52gqB64lZD4OD4HE+vMTwNSe+7MRKMLvcdKPWCRC35PnxIHZ15kfQ== dependencies: - "@react-aria/progress" "^3.4.17" - "@react-types/meter" "^3.4.4" - "@react-types/shared" "^3.25.0" + "@react-aria/progress" "^3.4.18" + "@react-types/meter" "^3.4.5" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/numberfield@^3.11.7", "@react-aria/numberfield@^3.11.8": - version "3.11.8" - resolved "https://registry.yarnpkg.com/@react-aria/numberfield/-/numberfield-3.11.8.tgz#d1682305df3cc64ce3a66f9ee997eb778a1aa077" - integrity sha512-CWRHbrjfpvEqBmtjwX8LjVds6+tMNneRlKF46ked5sZilfU2jIirufaucM36N4vX6N/W7nFR/rCbp2WCOU9p3Q== - dependencies: - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/spinbutton" "^3.6.9" - "@react-aria/textfield" "^3.14.10" - "@react-aria/utils" "^3.25.3" - "@react-stately/form" "^3.0.6" - "@react-stately/numberfield" "^3.9.7" - "@react-types/button" "^3.10.0" - "@react-types/numberfield" "^3.8.6" - "@react-types/shared" "^3.25.0" +"@react-aria/numberfield@^3.11.7", "@react-aria/numberfield@^3.11.9": + version "3.11.9" + resolved "https://registry.yarnpkg.com/@react-aria/numberfield/-/numberfield-3.11.9.tgz#175f801b18740534dca023cfd9ce0349eff940b0" + integrity sha512-3tiGPx2y4zyOV7PmdBASes99ZZsFTZAJTnU45Z+p1CW4131lw7y2ZhbojBl7U6DaXAJvi1z6zY6cq2UE9w5a0Q== + dependencies: + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/spinbutton" "^3.6.10" + "@react-aria/textfield" "^3.15.0" + "@react-aria/utils" "^3.26.0" + "@react-stately/form" "^3.1.0" + "@react-stately/numberfield" "^3.9.8" + "@react-types/button" "^3.10.1" + "@react-types/numberfield" "^3.8.7" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/overlays@^3.23.3", "@react-aria/overlays@^3.23.4": - version "3.23.4" - resolved "https://registry.yarnpkg.com/@react-aria/overlays/-/overlays-3.23.4.tgz#8fc2f7f5884f514056651490a17b9fd40e519df1" - integrity sha512-MZUW6SUlTWOwKuFTqUTxW5BnvdW3Y9cEwanWuz98NX3ST7JYe/3ZcZhb37/fGW4uoGHnQ9icEwVf0rbMrK2STg== - dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/ssr" "^3.9.6" - "@react-aria/utils" "^3.25.3" - "@react-aria/visually-hidden" "^3.8.17" - "@react-stately/overlays" "^3.6.11" - "@react-types/button" "^3.10.0" - "@react-types/overlays" "^3.8.10" - "@react-types/shared" "^3.25.0" +"@react-aria/overlays@^3.23.3", "@react-aria/overlays@^3.24.0": + version "3.24.0" + resolved "https://registry.yarnpkg.com/@react-aria/overlays/-/overlays-3.24.0.tgz#7f97cd12506961abfab3ae653822cea05d1cacd3" + integrity sha512-0kAXBsMNTc/a3M07tK9Cdt/ea8CxTAEJ223g8YgqImlmoBBYAL7dl5G01IOj67TM64uWPTmZrOklBchHWgEm3A== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/ssr" "^3.9.7" + "@react-aria/utils" "^3.26.0" + "@react-aria/visually-hidden" "^3.8.18" + "@react-stately/overlays" "^3.6.12" + "@react-types/button" "^3.10.1" + "@react-types/overlays" "^3.8.11" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/progress@^3.4.17": - version "3.4.17" - resolved "https://registry.yarnpkg.com/@react-aria/progress/-/progress-3.4.17.tgz#19b7ce3577049af679fba415e2ba177c68c3a125" - integrity sha512-5+01WNibLoNS5KcfU5p6vg7Lhz17plqqzv/uITx28zzj3saaj0VLR7n57Ig2fXe8ZEQoUS89BS3sIEsIf96S1A== +"@react-aria/progress@^3.4.17", "@react-aria/progress@^3.4.18": + version "3.4.18" + resolved "https://registry.yarnpkg.com/@react-aria/progress/-/progress-3.4.18.tgz#948859ce1b0e13d935da7d4cbe6812d451472fe4" + integrity sha512-FOLgJ9t9i1u3oAAimybJG6r7/soNPBnJfWo4Yr6MmaUv90qVGa1h6kiuM5m9H/bm5JobAebhdfHit9lFlgsCmg== dependencies: - "@react-aria/i18n" "^3.12.3" - "@react-aria/label" "^3.7.12" - "@react-aria/utils" "^3.25.3" - "@react-types/progress" "^3.5.7" - "@react-types/shared" "^3.25.0" + "@react-aria/i18n" "^3.12.4" + "@react-aria/label" "^3.7.13" + "@react-aria/utils" "^3.26.0" + "@react-types/progress" "^3.5.8" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/radio@^3.10.8": - version "3.10.9" - resolved "https://registry.yarnpkg.com/@react-aria/radio/-/radio-3.10.9.tgz#bca231f4f6136b6272bf1ee7701c22180553af4f" - integrity sha512-XnU7zGTEku1mPvJweX4I3ifwEBtglEWYoO4CZGvA3eXj39X8iGwNZXUst1pdk2ykWUKbtwrmsWA6zG2OAGODYw== - dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/form" "^3.0.10" - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/label" "^3.7.12" - "@react-aria/utils" "^3.25.3" - "@react-stately/radio" "^3.10.8" - "@react-types/radio" "^3.8.4" - "@react-types/shared" "^3.25.0" + version "3.10.10" + resolved "https://registry.yarnpkg.com/@react-aria/radio/-/radio-3.10.10.tgz#18e2811fb3e72298414c880bd9405ea3f1d83f1f" + integrity sha512-NVdeOVrsrHgSfwL2jWCCXFsWZb+RMRZErj5vthHQW4nkHECGOzeX56VaLWTSvdoCPqi9wdIX8A6K9peeAIgxzA== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/form" "^3.0.11" + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/label" "^3.7.13" + "@react-aria/utils" "^3.26.0" + "@react-stately/radio" "^3.10.9" + "@react-types/radio" "^3.8.5" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/searchfield@^3.7.9": - version "3.7.10" - resolved "https://registry.yarnpkg.com/@react-aria/searchfield/-/searchfield-3.7.10.tgz#a8997321e0e6aa8df8f36b39f4ab927eb744aaf0" - integrity sha512-1XTYh2dycedaK1tgpHAHcu8PTK1wG3dv53yLziu07JsBe9tX6O8jIFBhZK8SpfNnP8pEOI3PIlVEjaarLwgWzQ== - dependencies: - "@react-aria/i18n" "^3.12.3" - "@react-aria/textfield" "^3.14.10" - "@react-aria/utils" "^3.25.3" - "@react-stately/searchfield" "^3.5.7" - "@react-types/button" "^3.10.0" - "@react-types/searchfield" "^3.5.9" - "@react-types/shared" "^3.25.0" + version "3.7.11" + resolved "https://registry.yarnpkg.com/@react-aria/searchfield/-/searchfield-3.7.11.tgz#2be234280cc4fb58a316db6ba1f95ea34754c043" + integrity sha512-wFf6QxtBFfoxy0ANxI0+ftFEBGynVCY0+ce4H4Y9LpUTQsIKMp3sdc7LoUFORWw5Yee6Eid5cFPQX0Ymnk+ZJg== + dependencies: + "@react-aria/i18n" "^3.12.4" + "@react-aria/textfield" "^3.15.0" + "@react-aria/utils" "^3.26.0" + "@react-stately/searchfield" "^3.5.8" + "@react-types/button" "^3.10.1" + "@react-types/searchfield" "^3.5.10" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/select@^3.14.10": - version "3.14.11" - resolved "https://registry.yarnpkg.com/@react-aria/select/-/select-3.14.11.tgz#705d51c7b39bedcb1dfb0f59280f2aa4ae747584" - integrity sha512-rX5U4JcPNV41lNEF1tAxNxqrGENnLGZL/D5Y+YNpqKSU5U09+hD3ovsflNkF/d+deb25zg45JRxumwOCQ+rfyw== - dependencies: - "@react-aria/form" "^3.0.10" - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/label" "^3.7.12" - "@react-aria/listbox" "^3.13.5" - "@react-aria/menu" "^3.15.5" - "@react-aria/selection" "^3.20.1" - "@react-aria/utils" "^3.25.3" - "@react-aria/visually-hidden" "^3.8.17" - "@react-stately/select" "^3.6.8" - "@react-types/button" "^3.10.0" - "@react-types/select" "^3.9.7" - "@react-types/shared" "^3.25.0" + version "3.15.0" + resolved "https://registry.yarnpkg.com/@react-aria/select/-/select-3.15.0.tgz#e0b955ed908039f734805e852b58dec4b159adc9" + integrity sha512-zgBOUNy81aJplfc3NKDJMv8HkXjBGzaFF3XDzNfW8vJ7nD9rcTRUN5SQ1XCEnKMv12B/Euk9zt6kd+tX0wk1vQ== + dependencies: + "@react-aria/form" "^3.0.11" + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/label" "^3.7.13" + "@react-aria/listbox" "^3.13.6" + "@react-aria/menu" "^3.16.0" + "@react-aria/selection" "^3.21.0" + "@react-aria/utils" "^3.26.0" + "@react-aria/visually-hidden" "^3.8.18" + "@react-stately/select" "^3.6.9" + "@react-types/button" "^3.10.1" + "@react-types/select" "^3.9.8" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/selection@^3.20.0", "@react-aria/selection@^3.20.1": - version "3.20.1" - resolved "https://registry.yarnpkg.com/@react-aria/selection/-/selection-3.20.1.tgz#94b405214ea8506410f632fd2bfe470b9360ebfb" - integrity sha512-My0w8UC/7PAkz/1yZUjr2VRuzDZz1RrbgTqP36j5hsJx8RczDTjI4TmKtQNKG0ggaP4w83G2Og5JPTq3w3LMAw== - dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/utils" "^3.25.3" - "@react-stately/selection" "^3.17.0" - "@react-types/shared" "^3.25.0" +"@react-aria/selection@^3.20.0", "@react-aria/selection@^3.21.0": + version "3.21.0" + resolved "https://registry.yarnpkg.com/@react-aria/selection/-/selection-3.21.0.tgz#c5660e73a38db5e3e1cdc722e408b4489f5f589a" + integrity sha512-52JJ6hlPcM+gt0VV3DBmz6Kj1YAJr13TfutrKfGWcK36LvNCBm1j0N+TDqbdnlp8Nue6w0+5FIwZq44XPYiBGg== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/utils" "^3.26.0" + "@react-stately/selection" "^3.18.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/separator@^3.4.3": - version "3.4.3" - resolved "https://registry.yarnpkg.com/@react-aria/separator/-/separator-3.4.3.tgz#c13b06d185ecea3e69260428b0cbdee4ae7e8f18" - integrity sha512-L+eCmSGfRJ9jScHZqBkmOkp44LBARisDjRdYbGrLlsAEcOiHUXufnfpxz2rgkUGBdUgnI9hIk12q5kdy0UxGjg== + version "3.4.4" + resolved "https://registry.yarnpkg.com/@react-aria/separator/-/separator-3.4.4.tgz#7975177d256d8e864625d9823bf7a6de5a6b6460" + integrity sha512-dH+qt0Mdh0nhKXCHW6AR4DF8DKLUBP26QYWaoThPdBwIpypH/JVKowpPtWms1P4b36U6XzHXHnTTEn/ZVoCqNA== dependencies: - "@react-aria/utils" "^3.25.3" - "@react-types/shared" "^3.25.0" + "@react-aria/utils" "^3.26.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/slider@^3.7.12", "@react-aria/slider@^3.7.13": - version "3.7.13" - resolved "https://registry.yarnpkg.com/@react-aria/slider/-/slider-3.7.13.tgz#ddd0e08faf5c16cb90176d4bf8663cd7d1d43bce" - integrity sha512-yGlIpoOUKUoP0M3iI8ZHU001NASBOeZJSIQNfoS7HiqSR3bz+6BX7DRAM6B+CPHJleUtrdQ6JjO/8V8ZUV2kNQ== - dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/label" "^3.7.12" - "@react-aria/utils" "^3.25.3" - "@react-stately/slider" "^3.5.8" - "@react-types/shared" "^3.25.0" - "@react-types/slider" "^3.7.6" +"@react-aria/slider@^3.7.12", "@react-aria/slider@^3.7.14": + version "3.7.14" + resolved "https://registry.yarnpkg.com/@react-aria/slider/-/slider-3.7.14.tgz#25a362725d6cd71e9b86477362a36c847c73384e" + integrity sha512-7rOiKjLkEZ0j7mPMlwrqivc+K4OSfL14slaQp06GHRiJkhiWXh2/drPe15hgNq55HmBQBpA0umKMkJcqVgmXPA== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/label" "^3.7.13" + "@react-aria/utils" "^3.26.0" + "@react-stately/slider" "^3.6.0" + "@react-types/shared" "^3.26.0" + "@react-types/slider" "^3.7.7" "@swc/helpers" "^0.5.0" -"@react-aria/spinbutton@^3.6.9": - version "3.6.9" - resolved "https://registry.yarnpkg.com/@react-aria/spinbutton/-/spinbutton-3.6.9.tgz#9a74c0ec927d0a7949463efcddb5c52fc9841fa8" - integrity sha512-m+uVJdiIc2LrLVDGjU7p8P2O2gUvTN26GR+NgH4rl+tUSuAB0+T1rjls/C+oXEqQjCpQihEB9Bt4M+VHpzmyjA== - dependencies: - "@react-aria/i18n" "^3.12.3" - "@react-aria/live-announcer" "^3.4.0" - "@react-aria/utils" "^3.25.3" - "@react-types/button" "^3.10.0" - "@react-types/shared" "^3.25.0" +"@react-aria/spinbutton@^3.6.10": + version "3.6.10" + resolved "https://registry.yarnpkg.com/@react-aria/spinbutton/-/spinbutton-3.6.10.tgz#72154873de807638e17570bd57e8491912b613b7" + integrity sha512-nhYEYk7xUNOZDaqiQ5w/nHH9ouqjJbabTWXH+KK7UR1oVGfo4z1wG94l8KWF3Z6SGGnBxzLJyTBguZ4g9aYTSg== + dependencies: + "@react-aria/i18n" "^3.12.4" + "@react-aria/live-announcer" "^3.4.1" + "@react-aria/utils" "^3.26.0" + "@react-types/button" "^3.10.1" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/ssr@^3.9.6": - version "3.9.6" - resolved "https://registry.yarnpkg.com/@react-aria/ssr/-/ssr-3.9.6.tgz#a9e8b351acdc8238f2b5215b0ce904636c6ea690" - integrity sha512-iLo82l82ilMiVGy342SELjshuWottlb5+VefO3jOQqQRNYnJBFpUSadswDPbRimSgJUZuFwIEYs6AabkP038fA== +"@react-aria/ssr@^3.9.6", "@react-aria/ssr@^3.9.7": + version "3.9.7" + resolved "https://registry.yarnpkg.com/@react-aria/ssr/-/ssr-3.9.7.tgz#d89d129f7bbc5148657e6c952ac31c9353183770" + integrity sha512-GQygZaGlmYjmYM+tiNBA5C6acmiDWF52Nqd40bBp0Znk4M4hP+LTmI0lpI1BuKMw45T8RIhrAsICIfKwZvi2Gg== dependencies: "@swc/helpers" "^0.5.0" "@react-aria/switch@^3.6.8": - version "3.6.9" - resolved "https://registry.yarnpkg.com/@react-aria/switch/-/switch-3.6.9.tgz#66a299fccb8efdfb474c9ed4a99b124d7511572c" - integrity sha512-w7xIywpR6llm22DXYOObZ2Uqvsw+gNmxdJ86h8+YRtpSkFnPMhXtTMv3RXpEGYhPTt/YDIqfxiluF1E2IHGwIA== + version "3.6.10" + resolved "https://registry.yarnpkg.com/@react-aria/switch/-/switch-3.6.10.tgz#8fa5729bc4e76ac3df51389a8996873142daedb8" + integrity sha512-FtaI9WaEP1tAmra1sYlAkYXg9x75P5UtgY8pSbe9+1WRyWbuE1QZT+RNCTi3IU4fZ7iJQmXH6+VaMyzPlSUagw== dependencies: - "@react-aria/toggle" "^3.10.9" - "@react-stately/toggle" "^3.7.8" - "@react-types/shared" "^3.25.0" - "@react-types/switch" "^3.5.6" + "@react-aria/toggle" "^3.10.10" + "@react-stately/toggle" "^3.8.0" + "@react-types/shared" "^3.26.0" + "@react-types/switch" "^3.5.7" "@swc/helpers" "^0.5.0" "@react-aria/table@^3.15.4": - version "3.15.5" - resolved "https://registry.yarnpkg.com/@react-aria/table/-/table-3.15.5.tgz#241a93e2c8c1dd55b77676a2d19bb86ea6ff814a" - integrity sha512-bdNZF0ZoNOfyOEIK/ctv0llacaCNk8mv+GGy8mwh5bZeJjd8KuDIpYQtZJYvf2YVvPYRWyXRhF0/B229m65f/g== - dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/grid" "^3.10.5" - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/live-announcer" "^3.4.0" - "@react-aria/utils" "^3.25.3" - "@react-aria/visually-hidden" "^3.8.17" - "@react-stately/collections" "^3.11.0" - "@react-stately/flags" "^3.0.4" - "@react-stately/table" "^3.12.3" - "@react-types/checkbox" "^3.8.4" - "@react-types/grid" "^3.2.9" - "@react-types/shared" "^3.25.0" - "@react-types/table" "^3.10.2" + version "3.16.0" + resolved "https://registry.yarnpkg.com/@react-aria/table/-/table-3.16.0.tgz#f0ffb51f52494e68f2c3b81fba44278fbdc48c28" + integrity sha512-9xF9S3CJ7XRiiK92hsIKxPedD0kgcQWwqTMtj3IBynpQ4vsnRiW3YNIzrn9C3apjknRZDTSta8O2QPYCUMmw2A== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/grid" "^3.11.0" + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/live-announcer" "^3.4.1" + "@react-aria/utils" "^3.26.0" + "@react-aria/visually-hidden" "^3.8.18" + "@react-stately/collections" "^3.12.0" + "@react-stately/flags" "^3.0.5" + "@react-stately/table" "^3.13.0" + "@react-types/checkbox" "^3.9.0" + "@react-types/grid" "^3.2.10" + "@react-types/shared" "^3.26.0" + "@react-types/table" "^3.10.3" "@swc/helpers" "^0.5.0" "@react-aria/tabs@^3.9.6": - version "3.9.7" - resolved "https://registry.yarnpkg.com/@react-aria/tabs/-/tabs-3.9.7.tgz#2e44203e51f8b953cf3cc2dc2867b27cafb40a33" - integrity sha512-f78P2Y9ZCYtwOnteku9mPVIk21xSSREYWaQPtA9ebSgVbeR5ya6RpaX9ISc9cd0HEF3Av+hZYyS1pNXXWymv9g== - dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/i18n" "^3.12.3" - "@react-aria/selection" "^3.20.1" - "@react-aria/utils" "^3.25.3" - "@react-stately/tabs" "^3.6.10" - "@react-types/shared" "^3.25.0" - "@react-types/tabs" "^3.3.10" + version "3.9.8" + resolved "https://registry.yarnpkg.com/@react-aria/tabs/-/tabs-3.9.8.tgz#a0a647a4e2d1783125779473536419fd8caa9cfa" + integrity sha512-Nur/qRFBe+Zrt4xcCJV/ULXCS3Mlae+B89bp1Gl20vSDqk6uaPtGk+cS5k03eugOvas7AQapqNJsJgKd66TChw== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/i18n" "^3.12.4" + "@react-aria/selection" "^3.21.0" + "@react-aria/utils" "^3.26.0" + "@react-stately/tabs" "^3.7.0" + "@react-types/shared" "^3.26.0" + "@react-types/tabs" "^3.3.11" "@swc/helpers" "^0.5.0" "@react-aria/tag@^3.4.6": - version "3.4.7" - resolved "https://registry.yarnpkg.com/@react-aria/tag/-/tag-3.4.7.tgz#1b3e527cc9d7d948c67514cde64d82b7c2276e7f" - integrity sha512-hreVvphUeYUfMN6gjM3+WouN2P/WGuR0rGpOrFk2HEnGDPg3Ar0isfdAaciTSBOc26CDKNgrmzRguxCmKKuqgw== - dependencies: - "@react-aria/gridlist" "^3.9.5" - "@react-aria/i18n" "^3.12.3" - "@react-aria/interactions" "^3.22.4" - "@react-aria/label" "^3.7.12" - "@react-aria/selection" "^3.20.1" - "@react-aria/utils" "^3.25.3" - "@react-stately/list" "^3.11.0" - "@react-types/button" "^3.10.0" - "@react-types/shared" "^3.25.0" + version "3.4.8" + resolved "https://registry.yarnpkg.com/@react-aria/tag/-/tag-3.4.8.tgz#856899a53c2be2b8aea3d5aca020edf8608246b2" + integrity sha512-exWl52bsFtJuzaqMYvSnLteUoPqb3Wf+uICru/yRtREJsWVqjJF38NCVlU73Yqd9qMPTctDrboSZFAWAWKDxoA== + dependencies: + "@react-aria/gridlist" "^3.10.0" + "@react-aria/i18n" "^3.12.4" + "@react-aria/interactions" "^3.22.5" + "@react-aria/label" "^3.7.13" + "@react-aria/selection" "^3.21.0" + "@react-aria/utils" "^3.26.0" + "@react-stately/list" "^3.11.1" + "@react-types/button" "^3.10.1" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-aria/textfield@^3.14.10", "@react-aria/textfield@^3.14.9": - version "3.14.10" - resolved "https://registry.yarnpkg.com/@react-aria/textfield/-/textfield-3.14.10.tgz#a6fab9993f26f97b27c5bba1a23d757d09c43d62" - integrity sha512-vG44FgxwfJUF2S6tRG+Sg646DDEgs0CO9RYniafEOHz8rwcNIH3lML7n8LAfzQa+BjBY28+UF0wmqEvd6VCzCQ== - dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/form" "^3.0.10" - "@react-aria/label" "^3.7.12" - "@react-aria/utils" "^3.25.3" - "@react-stately/form" "^3.0.6" - "@react-stately/utils" "^3.10.4" - "@react-types/shared" "^3.25.0" - "@react-types/textfield" "^3.9.7" +"@react-aria/textfield@^3.14.9", "@react-aria/textfield@^3.15.0": + version "3.15.0" + resolved "https://registry.yarnpkg.com/@react-aria/textfield/-/textfield-3.15.0.tgz#17ebac0b73f084622aaf9697576b82155bed67cb" + integrity sha512-V5mg7y1OR6WXYHdhhm4FC7QyGc9TideVRDFij1SdOJrIo5IFB7lvwpOS0GmgwkVbtr71PTRMjZnNbrJUFU6VNA== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/form" "^3.0.11" + "@react-aria/label" "^3.7.13" + "@react-aria/utils" "^3.26.0" + "@react-stately/form" "^3.1.0" + "@react-stately/utils" "^3.10.5" + "@react-types/shared" "^3.26.0" + "@react-types/textfield" "^3.10.0" "@swc/helpers" "^0.5.0" -"@react-aria/toggle@^3.10.9": - version "3.10.9" - resolved "https://registry.yarnpkg.com/@react-aria/toggle/-/toggle-3.10.9.tgz#6876cdc963c311d73a11e33031a905b47927063b" - integrity sha512-dtfnyIU2/kcH9rFAiB48diSmaXDv45K7UCuTkMQLjbQa3QHC1oYNbleVN/VdGyAMBsIWtfl8L4uuPrAQmDV/bg== +"@react-aria/toggle@^3.10.10": + version "3.10.10" + resolved "https://registry.yarnpkg.com/@react-aria/toggle/-/toggle-3.10.10.tgz#444658bf606e6d56b1fd96737d5a552c93493979" + integrity sha512-QwMT/vTNrbrILxWVHfd9zVQ3mV2NdBwyRu+DphVQiFAXcmc808LEaIX2n0lI6FCsUDC9ZejCyvzd91/YemdZ1Q== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/interactions" "^3.22.5" + "@react-aria/utils" "^3.26.0" + "@react-stately/toggle" "^3.8.0" + "@react-types/checkbox" "^3.9.0" + "@react-types/shared" "^3.26.0" + "@swc/helpers" "^0.5.0" + +"@react-aria/toolbar@3.0.0-beta.11": + version "3.0.0-beta.11" + resolved "https://registry.yarnpkg.com/@react-aria/toolbar/-/toolbar-3.0.0-beta.11.tgz#019c9ff2a47ad207504a95afeb0f863cf71a114b" + integrity sha512-LM3jTRFNDgoEpoL568WaiuqiVM7eynSQLJis1hV0vlVnhTd7M7kzt7zoOjzxVb5Uapz02uCp1Fsm4wQMz09qwQ== dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/interactions" "^3.22.4" - "@react-aria/utils" "^3.25.3" - "@react-stately/toggle" "^3.7.8" - "@react-types/checkbox" "^3.8.4" - "@react-types/shared" "^3.25.0" + "@react-aria/focus" "^3.19.0" + "@react-aria/i18n" "^3.12.4" + "@react-aria/utils" "^3.26.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-aria/tooltip@^3.7.8": - version "3.7.9" - resolved "https://registry.yarnpkg.com/@react-aria/tooltip/-/tooltip-3.7.9.tgz#8a5f0d44f9988a7ceb55e34664bda07775ae82a2" - integrity sha512-TqVJ7YqaP/enxNyA1QGr43w4nBZmOs6Hb/pROMS5afbX7gHgMVFn0lTRc6DC2cvcfgYc4WICs2QiQMniZt/E7A== - dependencies: - "@react-aria/focus" "^3.18.4" - "@react-aria/interactions" "^3.22.4" - "@react-aria/utils" "^3.25.3" - "@react-stately/tooltip" "^3.4.13" - "@react-types/shared" "^3.25.0" - "@react-types/tooltip" "^3.4.12" + version "3.7.10" + resolved "https://registry.yarnpkg.com/@react-aria/tooltip/-/tooltip-3.7.10.tgz#d710532e80337e50be818dfbf2cc54d0a9b8c592" + integrity sha512-Udi3XOnrF/SYIz72jw9bgB74MG/yCOzF5pozHj2FH2HiJlchYv/b6rHByV/77IZemdlkmL/uugrv/7raPLSlnw== + dependencies: + "@react-aria/focus" "^3.19.0" + "@react-aria/interactions" "^3.22.5" + "@react-aria/utils" "^3.26.0" + "@react-stately/tooltip" "^3.5.0" + "@react-types/shared" "^3.26.0" + "@react-types/tooltip" "^3.4.13" "@swc/helpers" "^0.5.0" -"@react-aria/utils@^3.25.3": - version "3.25.3" - resolved "https://registry.yarnpkg.com/@react-aria/utils/-/utils-3.25.3.tgz#cad9bffc07b045cdc283df2cb65c18747acbf76d" - integrity sha512-PR5H/2vaD8fSq0H/UB9inNbc8KDcVmW6fYAfSWkkn+OAdhTTMVKqXXrZuZBWyFfSD5Ze7VN6acr4hrOQm2bmrA== +"@react-aria/utils@^3.25.3", "@react-aria/utils@^3.26.0": + version "3.26.0" + resolved "https://registry.yarnpkg.com/@react-aria/utils/-/utils-3.26.0.tgz#833cbfa33e75d15835b757791b3f754432d2f948" + integrity sha512-LkZouGSjjQ0rEqo4XJosS4L3YC/zzQkfRM3KoqK6fUOmUJ9t0jQ09WjiF+uOoG9u+p30AVg3TrZRUWmoTS+koQ== dependencies: - "@react-aria/ssr" "^3.9.6" - "@react-stately/utils" "^3.10.4" - "@react-types/shared" "^3.25.0" + "@react-aria/ssr" "^3.9.7" + "@react-stately/utils" "^3.10.5" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" clsx "^2.0.0" -"@react-aria/visually-hidden@^3.8.16", "@react-aria/visually-hidden@^3.8.17": - version "3.8.17" - resolved "https://registry.yarnpkg.com/@react-aria/visually-hidden/-/visually-hidden-3.8.17.tgz#b006aad526d78a9897fcbc793e57ddfe1adbd1af" - integrity sha512-WFgny1q2CbxxU6gu46TGQXf1DjsnuSk+RBDP4M7bm1mUVZzoCp7U7AtjNmsBrWg0NejxUdgD7+7jkHHCQ91qRA== +"@react-aria/visually-hidden@^3.8.16", "@react-aria/visually-hidden@^3.8.18": + version "3.8.18" + resolved "https://registry.yarnpkg.com/@react-aria/visually-hidden/-/visually-hidden-3.8.18.tgz#13c168736944cbe19cd8917ec33a4e6f5f694119" + integrity sha512-l/0igp+uub/salP35SsNWq5mGmg3G5F5QMS1gDZ8p28n7CgjvzyiGhJbbca7Oxvaw1HRFzVl9ev+89I7moNnFQ== dependencies: - "@react-aria/interactions" "^3.22.4" - "@react-aria/utils" "^3.25.3" - "@react-types/shared" "^3.25.0" + "@react-aria/interactions" "^3.22.5" + "@react-aria/utils" "^3.26.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-oauth/google@^0.12.1": @@ -1461,468 +1459,469 @@ resolved "https://registry.yarnpkg.com/@react-oauth/google/-/google-0.12.1.tgz#b76432c3a525e9afe076f787d2ded003fcc1bee9" integrity sha512-qagsy22t+7UdkYAiT5ZhfM4StXi9PPNvw0zuwNmabrWyMKddczMtBIOARflbaIj+wHiQjnMAsZmzsUYuXeyoSg== -"@react-stately/calendar@^3.5.5": - version "3.5.5" - resolved "https://registry.yarnpkg.com/@react-stately/calendar/-/calendar-3.5.5.tgz#52249991e1e9c40921cd3d6dce727c0dd37536cb" - integrity sha512-HzaiDRhrmaYIly8hRsjjIrydLkldiw1Ws6T/130NLQOt+VPwRW/x0R+nil42mA9LZ6oV0XN0NpmG5tn7TaKRGw== +"@react-stately/calendar@^3.5.5", "@react-stately/calendar@^3.6.0": + version "3.6.0" + resolved "https://registry.yarnpkg.com/@react-stately/calendar/-/calendar-3.6.0.tgz#c770890160c33826206a015eb7da32fe8ece81d5" + integrity sha512-GqUtOtGnwWjtNrJud8nY/ywI4VBP5byToNVRTnxbMl+gYO1Qe/uc5NG7zjwMxhb2kqSBHZFdkF0DXVqG2Ul+BA== dependencies: - "@internationalized/date" "^3.5.6" - "@react-stately/utils" "^3.10.4" - "@react-types/calendar" "^3.4.10" - "@react-types/shared" "^3.25.0" + "@internationalized/date" "^3.6.0" + "@react-stately/utils" "^3.10.5" + "@react-types/calendar" "^3.5.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/checkbox@^3.6.9": - version "3.6.9" - resolved "https://registry.yarnpkg.com/@react-stately/checkbox/-/checkbox-3.6.9.tgz#e7ff459cb8fe2f57bac37ed666e8304eb70a8ed3" - integrity sha512-JrY3ecnK/SSJPxw+qhGhg3YV4e0CpUcPDrVwY3mSiAE932DPd19xr+qVCknJ34H7JYYt/q0l2z0lmgPnl96RTg== +"@react-stately/checkbox@^3.6.10", "@react-stately/checkbox@^3.6.9": + version "3.6.10" + resolved "https://registry.yarnpkg.com/@react-stately/checkbox/-/checkbox-3.6.10.tgz#69b619fdfcf1e15d2d93392e13289a36d85a8a6c" + integrity sha512-LHm7i4YI8A/RdgWAuADrnSAYIaYYpQeZqsp1a03Og0pJHAlZL0ymN3y2IFwbZueY0rnfM+yF+kWNXjJqbKrFEQ== dependencies: - "@react-stately/form" "^3.0.6" - "@react-stately/utils" "^3.10.4" - "@react-types/checkbox" "^3.8.4" - "@react-types/shared" "^3.25.0" + "@react-stately/form" "^3.1.0" + "@react-stately/utils" "^3.10.5" + "@react-types/checkbox" "^3.9.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/collections@^3.11.0": - version "3.11.0" - resolved "https://registry.yarnpkg.com/@react-stately/collections/-/collections-3.11.0.tgz#d04dd728ce4f5036a4e2830b1bbbba36aafd2ef0" - integrity sha512-TiJeJjHMPSbbeAhmCXLJNSCk0fa5XnCvEuYw6HtQzDnYiq1AD7KAwkpjC5NfKkjqF3FLXs/v9RDm/P69q6rYzw== +"@react-stately/collections@^3.11.0", "@react-stately/collections@^3.12.0": + version "3.12.0" + resolved "https://registry.yarnpkg.com/@react-stately/collections/-/collections-3.12.0.tgz#6240d3517d0d86f7d9eb4997108fb432d569e8d7" + integrity sha512-MfR9hwCxe5oXv4qrLUnjidwM50U35EFmInUeFf8i9mskYwWlRYS0O1/9PZ0oF1M0cKambaRHKEy98jczgb9ycA== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/color@^3.8.0": - version "3.8.0" - resolved "https://registry.yarnpkg.com/@react-stately/color/-/color-3.8.0.tgz#4527fff1e4e632d35f58ee84493830066f678baa" - integrity sha512-lBH91HEStZeayhE/FkDMt9WC0UISQiAn8DoD2hfpTGeeWscX/soyxZA7oVL7zBOG9RfDBMNzF+CybVROrWSKAQ== - dependencies: - "@internationalized/number" "^3.5.4" - "@internationalized/string" "^3.2.4" - "@react-aria/i18n" "^3.12.3" - "@react-stately/form" "^3.0.6" - "@react-stately/numberfield" "^3.9.7" - "@react-stately/slider" "^3.5.8" - "@react-stately/utils" "^3.10.4" - "@react-types/color" "^3.0.0" - "@react-types/shared" "^3.25.0" +"@react-stately/color@^3.8.0", "@react-stately/color@^3.8.1": + version "3.8.1" + resolved "https://registry.yarnpkg.com/@react-stately/color/-/color-3.8.1.tgz#13969f61f1c1b468fd891cc94582056fc2da9b9b" + integrity sha512-7eN7K+KJRu+rxK351eGrzoq2cG+yipr90i5b1cUu4lioYmcH4WdsfjmM5Ku6gypbafH+kTDfflvO6hiY1NZH+A== + dependencies: + "@internationalized/number" "^3.6.0" + "@internationalized/string" "^3.2.5" + "@react-aria/i18n" "^3.12.4" + "@react-stately/form" "^3.1.0" + "@react-stately/numberfield" "^3.9.8" + "@react-stately/slider" "^3.6.0" + "@react-stately/utils" "^3.10.5" + "@react-types/color" "^3.0.1" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/combobox@^3.10.0": - version "3.10.0" - resolved "https://registry.yarnpkg.com/@react-stately/combobox/-/combobox-3.10.0.tgz#33667983b8042b6dd342dcea2dd8944e834d4a90" - integrity sha512-4W4HCCjjoddW/LZM3pSSeLoV7ncYXlaICKmqlBcbtLR5jY4U5Kx+pPpy3oJ1vCdjDHatIxZ0tVKEBP7vBQVeGQ== - dependencies: - "@react-stately/collections" "^3.11.0" - "@react-stately/form" "^3.0.6" - "@react-stately/list" "^3.11.0" - "@react-stately/overlays" "^3.6.11" - "@react-stately/select" "^3.6.8" - "@react-stately/utils" "^3.10.4" - "@react-types/combobox" "^3.13.0" - "@react-types/shared" "^3.25.0" +"@react-stately/combobox@^3.10.0", "@react-stately/combobox@^3.10.1": + version "3.10.1" + resolved "https://registry.yarnpkg.com/@react-stately/combobox/-/combobox-3.10.1.tgz#ebae28c5341d06d69cc8e50055fa816dee19522b" + integrity sha512-Rso+H+ZEDGFAhpKWbnRxRR/r7YNmYVtt+Rn0eNDNIUp3bYaxIBCdCySyAtALs4I8RZXZQ9zoUznP7YeVwG3cLg== + dependencies: + "@react-stately/collections" "^3.12.0" + "@react-stately/form" "^3.1.0" + "@react-stately/list" "^3.11.1" + "@react-stately/overlays" "^3.6.12" + "@react-stately/select" "^3.6.9" + "@react-stately/utils" "^3.10.5" + "@react-types/combobox" "^3.13.1" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" "@react-stately/data@^3.11.7": - version "3.11.7" - resolved "https://registry.yarnpkg.com/@react-stately/data/-/data-3.11.7.tgz#3c775ad3e90730abe9a2cd832aa59321f1860d26" - integrity sha512-2YJ+Lmca18f/h7jiZiU9j2IhBJl6BFO1BWlwvcCAH/eCWTdveX8gzsUdW++0szzpJaoCilTCYoi8z7QWyVH9jQ== + version "3.12.0" + resolved "https://registry.yarnpkg.com/@react-stately/data/-/data-3.12.0.tgz#4f0fd89be2fa64ca7c0ad92d8cf20f2cba961fc0" + integrity sha512-6PiW2oA56lcH1CVjDcajutzsv91w/PER8K61/OGxtNFFUWaIZH80RWmK4kjU/Lf0vygzXCxout3kEb388lUk0w== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/datepicker@^3.10.3": - version "3.10.3" - resolved "https://registry.yarnpkg.com/@react-stately/datepicker/-/datepicker-3.10.3.tgz#4c7ea07cc1e0d145c8ce1ad406ca44470e654c4c" - integrity sha512-6PJW1QMwk6BQMktV9L6DA4f2rfAdLfbq3iTNLy4qxd5IfNPLMUZiJGGTj+cuqx0WcEl+q5irp+YhKBpbmhPZHg== - dependencies: - "@internationalized/date" "^3.5.6" - "@internationalized/string" "^3.2.4" - "@react-stately/form" "^3.0.6" - "@react-stately/overlays" "^3.6.11" - "@react-stately/utils" "^3.10.4" - "@react-types/datepicker" "^3.8.3" - "@react-types/shared" "^3.25.0" +"@react-stately/datepicker@^3.10.3", "@react-stately/datepicker@^3.11.0": + version "3.11.0" + resolved "https://registry.yarnpkg.com/@react-stately/datepicker/-/datepicker-3.11.0.tgz#5f4daff449f756dc40b4201ae337dd4a3f29facc" + integrity sha512-d9MJF34A0VrhL5y5S8mAISA8uwfNCQKmR2k4KoQJm3De1J8SQeNzSjLviAwh1faDow6FXGlA6tVbTrHyDcBgBg== + dependencies: + "@internationalized/date" "^3.6.0" + "@internationalized/string" "^3.2.5" + "@react-stately/form" "^3.1.0" + "@react-stately/overlays" "^3.6.12" + "@react-stately/utils" "^3.10.5" + "@react-types/datepicker" "^3.9.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/dnd@^3.4.3": - version "3.4.3" - resolved "https://registry.yarnpkg.com/@react-stately/dnd/-/dnd-3.4.3.tgz#c9c82729d18e3e48a174a58d607f81458b7785ac" - integrity sha512-sUvhmMxFEw6P2MW7walx0ntakIihxdPxA06K9YZ3+ReaUvzQuRw5cFDaTTHrlegWRMYD0CyQaKlGIaTQihhvVA== +"@react-stately/dnd@^3.4.3", "@react-stately/dnd@^3.5.0": + version "3.5.0" + resolved "https://registry.yarnpkg.com/@react-stately/dnd/-/dnd-3.5.0.tgz#3061233709f7113f6492de33204aea507c243a2e" + integrity sha512-ZcWFw1npEDnATiy3TEdzA1skQ3UEIyfbNA6VhPNO8yiSVLxoxBOaEaq8VVS72fRGAtxud6dgOy8BnsP9JwDClQ== dependencies: - "@react-stately/selection" "^3.17.0" - "@react-types/shared" "^3.25.0" + "@react-stately/selection" "^3.18.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/flags@^3.0.4": - version "3.0.4" - resolved "https://registry.yarnpkg.com/@react-stately/flags/-/flags-3.0.4.tgz#ac778647733b6f7c46f4b0f907cec82f08986490" - integrity sha512-RNJEkOALwKg+JeYsfNlfPc4GXm7hiBLX0yuHOkRapWEyDOfi0cinkV/TZG4goOZdQ5tBpHmemf2qqiHAxqHlzQ== +"@react-stately/flags@^3.0.5": + version "3.0.5" + resolved "https://registry.yarnpkg.com/@react-stately/flags/-/flags-3.0.5.tgz#b35bcbd3b80c4f821e23e9c649566a4af11e97bf" + integrity sha512-6wks4csxUwPCp23LgJSnkBRhrWpd9jGd64DjcCTNB2AHIFu7Ab1W59pJpUL6TW7uAxVxdNKjgn6D1hlBy8qWsA== dependencies: "@swc/helpers" "^0.5.0" -"@react-stately/form@^3.0.6": - version "3.0.6" - resolved "https://registry.yarnpkg.com/@react-stately/form/-/form-3.0.6.tgz#788c837a7967a499366928a91738b15dd7f87d77" - integrity sha512-KMsxm3/V0iCv/6ikt4JEjVM3LW2AgCzo7aNotMzRobtwIo0RwaUo7DQNY00rGgFQ3/IjzI6DcVo13D+AVE/zXg== +"@react-stately/form@^3.0.6", "@react-stately/form@^3.1.0": + version "3.1.0" + resolved "https://registry.yarnpkg.com/@react-stately/form/-/form-3.1.0.tgz#7fdb4ca153be18e7516a02e507ada393ad38945d" + integrity sha512-E2wxNQ0QaTyDHD0nJFtTSnEH9A3bpJurwxhS4vgcUmESHgjFEMLlC9irUSZKgvOgb42GAq+fHoWBsgKeTp9Big== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/grid@^3.9.3": - version "3.9.3" - resolved "https://registry.yarnpkg.com/@react-stately/grid/-/grid-3.9.3.tgz#1ead7cc7b6d036c4609692eaf818a70f472ba8c8" - integrity sha512-P5KgCNYwm/n8bbLx6527li89RQWoESikrsg2MMyUpUd6IJ321t2pGONGRRQzxE0SBMolPRDJKV0Do2OlsjYKhQ== +"@react-stately/grid@^3.10.0": + version "3.10.0" + resolved "https://registry.yarnpkg.com/@react-stately/grid/-/grid-3.10.0.tgz#d74e46f85a662092c61812a8508cf4ac8721ec6d" + integrity sha512-ii+DdsOBvCnHMgL0JvUfFwO1kiAPP19Bpdpl6zn/oOltk6F5TmnoyNrzyz+2///1hCiySI3FE1O7ujsAQs7a6Q== dependencies: - "@react-stately/collections" "^3.11.0" - "@react-stately/selection" "^3.17.0" - "@react-types/grid" "^3.2.9" - "@react-types/shared" "^3.25.0" + "@react-stately/collections" "^3.12.0" + "@react-stately/selection" "^3.18.0" + "@react-types/grid" "^3.2.10" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/list@^3.11.0": - version "3.11.0" - resolved "https://registry.yarnpkg.com/@react-stately/list/-/list-3.11.0.tgz#8bb5d43f6468510562d1023a59f039052a7237f8" - integrity sha512-O+BxXcbtoLZWn4QIT54RoFUaM+QaJQm6s0ZBJ3Jv4ILIhukVOc55ra+aWMVlXFQSpbf6I3hyVP6cz1yyvd5Rtw== +"@react-stately/list@^3.11.0", "@react-stately/list@^3.11.1": + version "3.11.1" + resolved "https://registry.yarnpkg.com/@react-stately/list/-/list-3.11.1.tgz#d1493e5b9c5cac6cafb3cb3a6edb852bf3cb208f" + integrity sha512-UCOpIvqBOjwLtk7zVTYWuKU1m1Oe61Q5lNar/GwHaV1nAiSQ8/yYlhr40NkBEs9X3plEfsV28UIpzOrYnu1tPg== dependencies: - "@react-stately/collections" "^3.11.0" - "@react-stately/selection" "^3.17.0" - "@react-stately/utils" "^3.10.4" - "@react-types/shared" "^3.25.0" + "@react-stately/collections" "^3.12.0" + "@react-stately/selection" "^3.18.0" + "@react-stately/utils" "^3.10.5" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/menu@^3.8.3": - version "3.8.3" - resolved "https://registry.yarnpkg.com/@react-stately/menu/-/menu-3.8.3.tgz#88656fc799ab8591650c87711688a8ccb7986c2e" - integrity sha512-sV63V+cMgzipx/N7dq5GaXoItfXIfFEpCtlk3PM2vKstlCJalszXrdo+x996bkeU96h0plB7znAlhlXOeTKzUg== +"@react-stately/menu@^3.8.3", "@react-stately/menu@^3.9.0": + version "3.9.0" + resolved "https://registry.yarnpkg.com/@react-stately/menu/-/menu-3.9.0.tgz#b1e55996405f4e43ff844cbd325df9842914efe4" + integrity sha512-++sm0fzZeUs9GvtRbj5RwrP+KL9KPANp9f4SvtI3s+MP+Y/X3X7LNNePeeccGeyikB5fzMsuyvd82bRRW9IhDQ== dependencies: - "@react-stately/overlays" "^3.6.11" - "@react-types/menu" "^3.9.12" - "@react-types/shared" "^3.25.0" + "@react-stately/overlays" "^3.6.12" + "@react-types/menu" "^3.9.13" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/numberfield@^3.9.7": - version "3.9.7" - resolved "https://registry.yarnpkg.com/@react-stately/numberfield/-/numberfield-3.9.7.tgz#5685d108339ca627b43158630cba09e84a7f5bf4" - integrity sha512-PjSgCCpYasGCEAznFQNqa2JhhEQ5+/2eMiV7ZI5j76q3edTNF8G5OOCl2RazDbzFp6vDAnRVT7Kctx5Tl5R/Zw== +"@react-stately/numberfield@^3.9.7", "@react-stately/numberfield@^3.9.8": + version "3.9.8" + resolved "https://registry.yarnpkg.com/@react-stately/numberfield/-/numberfield-3.9.8.tgz#863a6c0f7a4249759dd0c586f2e27dd2548aadee" + integrity sha512-J6qGILxDNEtu7yvd3/y+FpbrxEaAeIODwlrFo6z1kvuDlLAm/KszXAc75yoDi0OtakFTCMP6/HR5VnHaQdMJ3w== dependencies: - "@internationalized/number" "^3.5.4" - "@react-stately/form" "^3.0.6" - "@react-stately/utils" "^3.10.4" - "@react-types/numberfield" "^3.8.6" + "@internationalized/number" "^3.6.0" + "@react-stately/form" "^3.1.0" + "@react-stately/utils" "^3.10.5" + "@react-types/numberfield" "^3.8.7" "@swc/helpers" "^0.5.0" -"@react-stately/overlays@^3.6.11": - version "3.6.11" - resolved "https://registry.yarnpkg.com/@react-stately/overlays/-/overlays-3.6.11.tgz#67d413853d47d49ed2687c6b74b1749f4b26da6e" - integrity sha512-usuxitwOx4FbmOW7Og4VM8R8ZjerbHZLLbFaxZW7pWLs7Ypway1YhJ3SWcyNTYK7NEk4o602kSoU6MSev1Vgag== +"@react-stately/overlays@^3.6.11", "@react-stately/overlays@^3.6.12": + version "3.6.12" + resolved "https://registry.yarnpkg.com/@react-stately/overlays/-/overlays-3.6.12.tgz#beb594a0e140dbd7957bfa181006854f91480bea" + integrity sha512-QinvZhwZgj8obUyPIcyURSCjTZlqZYRRCS60TF8jH8ZpT0tEAuDb3wvhhSXuYA3Xo9EHLwvLjEf3tQKKdAQArw== dependencies: - "@react-stately/utils" "^3.10.4" - "@react-types/overlays" "^3.8.10" + "@react-stately/utils" "^3.10.5" + "@react-types/overlays" "^3.8.11" "@swc/helpers" "^0.5.0" -"@react-stately/radio@^3.10.8": - version "3.10.8" - resolved "https://registry.yarnpkg.com/@react-stately/radio/-/radio-3.10.8.tgz#1b6e146dc402ce9de05176be6fd75ed260a674c5" - integrity sha512-VRq6Gzsbk3jzX6hdrSoDoSra9vLRsOi2pLkvW/CMrJ0GSgMwr8jjvJKnNFvYJ3eYQb20EwkarsOAfk7vPSIt/Q== +"@react-stately/radio@^3.10.8", "@react-stately/radio@^3.10.9": + version "3.10.9" + resolved "https://registry.yarnpkg.com/@react-stately/radio/-/radio-3.10.9.tgz#cf74b8f47cbef56836424d2e7d06c01fe9d9ea05" + integrity sha512-kUQ7VdqFke8SDRCatw2jW3rgzMWbvw+n2imN2THETynI47NmNLzNP11dlGO2OllRtTrsLhmBNlYHa3W62pFpAw== dependencies: - "@react-stately/form" "^3.0.6" - "@react-stately/utils" "^3.10.4" - "@react-types/radio" "^3.8.4" - "@react-types/shared" "^3.25.0" + "@react-stately/form" "^3.1.0" + "@react-stately/utils" "^3.10.5" + "@react-types/radio" "^3.8.5" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/searchfield@^3.5.7": - version "3.5.7" - resolved "https://registry.yarnpkg.com/@react-stately/searchfield/-/searchfield-3.5.7.tgz#7cfe3ec976f59cbda038bef775abe66e3b5bbf15" - integrity sha512-VxEG4tWDypdXQ8f7clZBu5Qmc4osqDBeA/gNMA2i1j/h2zRVcCJ0fRCHuDeXLSWBqF1XXAI4TWV53fBBwJusbg== +"@react-stately/searchfield@^3.5.7", "@react-stately/searchfield@^3.5.8": + version "3.5.8" + resolved "https://registry.yarnpkg.com/@react-stately/searchfield/-/searchfield-3.5.8.tgz#b927e25afd32b1a32f22d54df6d7b4968cb4cf33" + integrity sha512-jtquvGadx1DmtQqPKaVO6Qg/xpBjNxsOd59ciig9xRxpxV+90i996EX1E2R6R+tGJdSM1pD++7PVOO4yE++HOg== dependencies: - "@react-stately/utils" "^3.10.4" - "@react-types/searchfield" "^3.5.9" + "@react-stately/utils" "^3.10.5" + "@react-types/searchfield" "^3.5.10" "@swc/helpers" "^0.5.0" -"@react-stately/select@^3.6.8": - version "3.6.8" - resolved "https://registry.yarnpkg.com/@react-stately/select/-/select-3.6.8.tgz#37d331a4cf248c428aad5fc1349a7cdcae12f89b" - integrity sha512-fLAVzGeYSdYdBdrEVws6Pb1ywFPdapA0eWphoW5s3fS0/pKcVWwbCHeHlaBEi1ISyqEubQZFGQdeFKm/M46Hew== - dependencies: - "@react-stately/form" "^3.0.6" - "@react-stately/list" "^3.11.0" - "@react-stately/overlays" "^3.6.11" - "@react-types/select" "^3.9.7" - "@react-types/shared" "^3.25.0" +"@react-stately/select@^3.6.8", "@react-stately/select@^3.6.9": + version "3.6.9" + resolved "https://registry.yarnpkg.com/@react-stately/select/-/select-3.6.9.tgz#088bb0fe7b16cc67d833f17c330df63c4310a5da" + integrity sha512-vASUDv7FhEYQURzM+JIwcusPv7/x/l3zHc/oKJPvoCl3aa9pwS8hZwS82SC00o2iFnrDscfDJju4IE/cd4hucg== + dependencies: + "@react-stately/form" "^3.1.0" + "@react-stately/list" "^3.11.1" + "@react-stately/overlays" "^3.6.12" + "@react-types/select" "^3.9.8" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/selection@^3.17.0": - version "3.17.0" - resolved "https://registry.yarnpkg.com/@react-stately/selection/-/selection-3.17.0.tgz#92ada2cfe00bf47a5c4a53b2809c6703d71a9798" - integrity sha512-It3LRTaFOavybuDBvBH2mvCh73OL4awqvN4tZ0JzLzMtaYSBe9+YmFasYrzB0o7ca17B2q1tpUmsNWaAgIqbLA== +"@react-stately/selection@^3.17.0", "@react-stately/selection@^3.18.0": + version "3.18.0" + resolved "https://registry.yarnpkg.com/@react-stately/selection/-/selection-3.18.0.tgz#eb723dc26eafd9b016c55056fb550bdde8b4f87b" + integrity sha512-6EaNNP3exxBhW2LkcRR4a3pg+3oDguZlBSqIVVR7lyahv/D8xXHRC4dX+m0mgGHJpsgjs7664Xx6c8v193TFxg== dependencies: - "@react-stately/collections" "^3.11.0" - "@react-stately/utils" "^3.10.4" - "@react-types/shared" "^3.25.0" + "@react-stately/collections" "^3.12.0" + "@react-stately/utils" "^3.10.5" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/slider@^3.5.8": - version "3.5.8" - resolved "https://registry.yarnpkg.com/@react-stately/slider/-/slider-3.5.8.tgz#b5370f2bc0b8833a191b9ce1e0d48e49d4b44972" - integrity sha512-EDgbrxMq1w3+XTN72MGl3YtAG/j65EYX1Uc3Fh56K00+inJbTdRWyYTrb3NA310fXCd0WFBbzExuH2ohlKQycg== +"@react-stately/slider@^3.5.8", "@react-stately/slider@^3.6.0": + version "3.6.0" + resolved "https://registry.yarnpkg.com/@react-stately/slider/-/slider-3.6.0.tgz#20439e08915725c4f6ba2285a561ae92fe59d997" + integrity sha512-w5vJxVh267pmD1X+Ppd9S3ZzV1hcg0cV8q5P4Egr160b9WMcWlUspZPtsthwUlN7qQe/C8y5IAhtde4s29eNag== dependencies: - "@react-stately/utils" "^3.10.4" - "@react-types/shared" "^3.25.0" - "@react-types/slider" "^3.7.6" + "@react-stately/utils" "^3.10.5" + "@react-types/shared" "^3.26.0" + "@react-types/slider" "^3.7.7" "@swc/helpers" "^0.5.0" -"@react-stately/table@^3.12.3": - version "3.12.3" - resolved "https://registry.yarnpkg.com/@react-stately/table/-/table-3.12.3.tgz#aae5fa267af2de6ed77a79b8482758ffdd318e80" - integrity sha512-8uGrLcNJYeMbFtzRQZFWCBj5kV+7v3jzwoKIL1j9TmYUKow1PTDMQbPJpAZLQhnC2wVMlaFVgDbedSlbBij7Zg== - dependencies: - "@react-stately/collections" "^3.11.0" - "@react-stately/flags" "^3.0.4" - "@react-stately/grid" "^3.9.3" - "@react-stately/selection" "^3.17.0" - "@react-stately/utils" "^3.10.4" - "@react-types/grid" "^3.2.9" - "@react-types/shared" "^3.25.0" - "@react-types/table" "^3.10.2" +"@react-stately/table@^3.12.3", "@react-stately/table@^3.13.0": + version "3.13.0" + resolved "https://registry.yarnpkg.com/@react-stately/table/-/table-3.13.0.tgz#8465030f568b5ee779623d99b2ef22940a99a6cd" + integrity sha512-mRbNYrwQIE7xzVs09Lk3kPteEVFVyOc20vA8ph6EP54PiUf/RllJpxZe/WUYLf4eom9lUkRYej5sffuUBpxjCA== + dependencies: + "@react-stately/collections" "^3.12.0" + "@react-stately/flags" "^3.0.5" + "@react-stately/grid" "^3.10.0" + "@react-stately/selection" "^3.18.0" + "@react-stately/utils" "^3.10.5" + "@react-types/grid" "^3.2.10" + "@react-types/shared" "^3.26.0" + "@react-types/table" "^3.10.3" "@swc/helpers" "^0.5.0" -"@react-stately/tabs@^3.6.10": - version "3.6.10" - resolved "https://registry.yarnpkg.com/@react-stately/tabs/-/tabs-3.6.10.tgz#89543e109e473b308ec12f2bc62ba99c74038f70" - integrity sha512-F7wfoiNsrBy7c02AYHyE1USGgj05HQ0hp7uXmQjp2LEa+AA0NKKi3HdswTHHySxb0ZRuoEE7E7vp/gXQYx2/Ow== +"@react-stately/tabs@^3.6.10", "@react-stately/tabs@^3.7.0": + version "3.7.0" + resolved "https://registry.yarnpkg.com/@react-stately/tabs/-/tabs-3.7.0.tgz#f849b334c5e7d39a37c2e9ffa3114531bf8ce6e4" + integrity sha512-ox4hTkfZCoR4Oyr3Op3rBlWNq2Wxie04vhEYpTZQ2hobR3l4fYaOkd7CPClILktJ3TC104j8wcb0knWxIBRx9w== dependencies: - "@react-stately/list" "^3.11.0" - "@react-types/shared" "^3.25.0" - "@react-types/tabs" "^3.3.10" + "@react-stately/list" "^3.11.1" + "@react-types/shared" "^3.26.0" + "@react-types/tabs" "^3.3.11" "@swc/helpers" "^0.5.0" -"@react-stately/toggle@^3.7.8": - version "3.7.8" - resolved "https://registry.yarnpkg.com/@react-stately/toggle/-/toggle-3.7.8.tgz#14bcc21a50c7196c084542aa83584db86c911d13" - integrity sha512-ySOtkByvIY54yIu8IZ4lnvomQA0H+/mkZnd6T5fKN3tjvIzHmkUk3TAPmNInUxHX148tSW6mWwec0xvjYqEd6w== +"@react-stately/toggle@^3.7.8", "@react-stately/toggle@^3.8.0": + version "3.8.0" + resolved "https://registry.yarnpkg.com/@react-stately/toggle/-/toggle-3.8.0.tgz#39a3e45989f56e236809d8fe69c160cc88a616f5" + integrity sha512-pyt/k/J8BwE/2g6LL6Z6sMSWRx9HEJB83Sm/MtovXnI66sxJ2EfQ1OaXB7Su5PEL9OMdoQF6Mb+N1RcW3zAoPw== dependencies: - "@react-stately/utils" "^3.10.4" - "@react-types/checkbox" "^3.8.4" + "@react-stately/utils" "^3.10.5" + "@react-types/checkbox" "^3.9.0" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/tooltip@^3.4.13": - version "3.4.13" - resolved "https://registry.yarnpkg.com/@react-stately/tooltip/-/tooltip-3.4.13.tgz#72381ef42afd6731d4a1b51c30632bb8d07f1757" - integrity sha512-zQ+8FQ7Pi0Cz852dltXb6yaryjE18K3byK4tIO3e5vnrZHEGvfdxowc+v9ak5UV93kVrYoOVmfZHRcEaTXTBNA== +"@react-stately/tooltip@^3.4.13", "@react-stately/tooltip@^3.5.0": + version "3.5.0" + resolved "https://registry.yarnpkg.com/@react-stately/tooltip/-/tooltip-3.5.0.tgz#1016952eb4427d5b848e2efcb24eee47e2a26b59" + integrity sha512-+xzPNztJDd2XJD0X3DgWKlrgOhMqZpSzsIssXeJgO7uCnP8/Z513ESaipJhJCFC8fxj5caO/DK4Uu8hEtlB8cQ== dependencies: - "@react-stately/overlays" "^3.6.11" - "@react-types/tooltip" "^3.4.12" + "@react-stately/overlays" "^3.6.12" + "@react-types/tooltip" "^3.4.13" "@swc/helpers" "^0.5.0" -"@react-stately/tree@^3.8.5": - version "3.8.5" - resolved "https://registry.yarnpkg.com/@react-stately/tree/-/tree-3.8.5.tgz#4c08d29c75a809265c7ead00b78e26d10bb7aea7" - integrity sha512-0/tYhsKWQQJTOZFDwh8hY3Qk6ejNFRldGrLeK5kS22UZdvsMFyh7WAi40FTCJy561/VoB0WqQI4oyNPOa9lYWg== +"@react-stately/tree@^3.8.5", "@react-stately/tree@^3.8.6": + version "3.8.6" + resolved "https://registry.yarnpkg.com/@react-stately/tree/-/tree-3.8.6.tgz#85dc33c5d5b9a455ffc0b474300957e511db1ea4" + integrity sha512-lblUaxf1uAuIz5jm6PYtcJ+rXNNVkqyFWTIMx6g6gW/mYvm8GNx1G/0MLZE7E6CuDGaO9dkLSY2bB1uqyKHidA== dependencies: - "@react-stately/collections" "^3.11.0" - "@react-stately/selection" "^3.17.0" - "@react-stately/utils" "^3.10.4" - "@react-types/shared" "^3.25.0" + "@react-stately/collections" "^3.12.0" + "@react-stately/selection" "^3.18.0" + "@react-stately/utils" "^3.10.5" + "@react-types/shared" "^3.26.0" "@swc/helpers" "^0.5.0" -"@react-stately/utils@^3.10.4": - version "3.10.4" - resolved "https://registry.yarnpkg.com/@react-stately/utils/-/utils-3.10.4.tgz#310663a834b67048d305e1680ed258130092fe51" - integrity sha512-gBEQEIMRh5f60KCm7QKQ2WfvhB2gLUr9b72sqUdIZ2EG+xuPgaIlCBeSicvjmjBvYZwOjoOEnmIkcx2GHp/HWw== +"@react-stately/utils@^3.10.5": + version "3.10.5" + resolved "https://registry.yarnpkg.com/@react-stately/utils/-/utils-3.10.5.tgz#47bb91cd5afd1bafe39353614e5e281b818ebccc" + integrity sha512-iMQSGcpaecghDIh3mZEpZfoFH3ExBwTtuBEcvZ2XnGzCgQjeYXcMdIUwAfVQLXFTdHUHGF6Gu6/dFrYsCzySBQ== dependencies: "@swc/helpers" "^0.5.0" -"@react-types/breadcrumbs@^3.7.8": - version "3.7.8" - resolved "https://registry.yarnpkg.com/@react-types/breadcrumbs/-/breadcrumbs-3.7.8.tgz#edcde11c06bad19008a066fa0a91eb5fda9723f5" - integrity sha512-+BW2a+PrY8ArZ+pKecz13oJFrUAhthvXx17o3x0BhWUhRpAdtmTYt2hjw8zNanm2j0Kvgo1HYKgvtskCRxYcOA== +"@react-types/breadcrumbs@^3.7.9": + version "3.7.9" + resolved "https://registry.yarnpkg.com/@react-types/breadcrumbs/-/breadcrumbs-3.7.9.tgz#c75eae6158bd3631854bff7521c2373b42b0e37c" + integrity sha512-eARYJo8J+VfNV8vP4uw3L2Qliba9wLV2bx9YQCYf5Lc/OE5B/y4gaTLz+Y2P3Rtn6gBPLXY447zCs5i7gf+ICg== dependencies: - "@react-types/link" "^3.5.8" - "@react-types/shared" "^3.25.0" + "@react-types/link" "^3.5.9" + "@react-types/shared" "^3.26.0" -"@react-types/button@^3.10.0": - version "3.10.0" - resolved "https://registry.yarnpkg.com/@react-types/button/-/button-3.10.0.tgz#5044648401f9842c47a433c66180a5a520cc29af" - integrity sha512-rAyU+N9VaHLBdZop4zasn8IDwf9I5Q1EzHUKMtzIFf5aUlMUW+K460zI/l8UESWRSWAXK9/WPSXGxfcoCEjvAA== +"@react-types/button@^3.10.1": + version "3.10.1" + resolved "https://registry.yarnpkg.com/@react-types/button/-/button-3.10.1.tgz#fc7ada2e83bc661b31c1473a82ec86dc11de057c" + integrity sha512-XTtap8o04+4QjPNAshFWOOAusUTxQlBjU2ai0BTVLShQEjHhRVDBIWsI2B2FKJ4KXT6AZ25llaxhNrreWGonmA== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/calendar@^3.4.10": - version "3.4.10" - resolved "https://registry.yarnpkg.com/@react-types/calendar/-/calendar-3.4.10.tgz#65011c31fb497e25bd98d19c84da3b8d63d5a3aa" - integrity sha512-PyjqxwJxSW2IpQx6y0D9O34fRCWn1gv9q0qFhgaIigIQrPg8zTE/CC7owHLxAtgCnnCt8exJ5rqi414csaHKlA== +"@react-types/calendar@^3.5.0": + version "3.5.0" + resolved "https://registry.yarnpkg.com/@react-types/calendar/-/calendar-3.5.0.tgz#a72fa15e08c7785b145005560baa35ad9b44627b" + integrity sha512-O3IRE7AGwAWYnvJIJ80cOy7WwoJ0m8GtX/qSmvXQAjC4qx00n+b5aFNBYAQtcyc3RM5QpW6obs9BfwGetFiI8w== dependencies: - "@internationalized/date" "^3.5.6" - "@react-types/shared" "^3.25.0" + "@internationalized/date" "^3.6.0" + "@react-types/shared" "^3.26.0" -"@react-types/checkbox@^3.8.4": - version "3.8.4" - resolved "https://registry.yarnpkg.com/@react-types/checkbox/-/checkbox-3.8.4.tgz#a51b90025fd362d8b755d8a95640a0134124f688" - integrity sha512-fvZrlQmlFNsYHZpl7GVmyYQlKdUtO5MczMSf8z3TlSiCb5Kl3ha9PsZgLhJqGuVnzB2ArIBz0eZrYa3k0PhcpA== +"@react-types/checkbox@^3.9.0": + version "3.9.0" + resolved "https://registry.yarnpkg.com/@react-types/checkbox/-/checkbox-3.9.0.tgz#d4621fef81850543f7a028917e9c2781cd871443" + integrity sha512-9hbHx0Oo2Hp5a8nV8Q75LQR0DHtvOIJbFaeqESSopqmV9EZoYjtY/h0NS7cZetgahQgnqYWQi44XGooMDCsmxA== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/color@^3.0.0": - version "3.0.0" - resolved "https://registry.yarnpkg.com/@react-types/color/-/color-3.0.0.tgz#85965ce25e5d86e544b9be3603fd59c60c087d3c" - integrity sha512-VUH8CROAM69GsMBilrJ1xyAdVsWL01nXQYrkZJxAEApv1OrcpIGSdsXLcGrjsrhjjiNVXxWFnqYRMsKkLzIl7g== +"@react-types/color@^3.0.1": + version "3.0.1" + resolved "https://registry.yarnpkg.com/@react-types/color/-/color-3.0.1.tgz#42d49736cf606b8434d3fe0ab60b75bb0f44f554" + integrity sha512-KemFziO3GbmT3HEKrgOGdqNA6Gsmy9xrwFO3f8qXSG7gVz6M27Ic4R9HVQv4iAjap5uti6W13/pk2bc/jLVcEA== dependencies: - "@react-types/shared" "^3.25.0" - "@react-types/slider" "^3.7.6" + "@react-types/shared" "^3.26.0" + "@react-types/slider" "^3.7.7" -"@react-types/combobox@^3.13.0": - version "3.13.0" - resolved "https://registry.yarnpkg.com/@react-types/combobox/-/combobox-3.13.0.tgz#2969ae4121c6d4c7a9e843530f60d9da6b520f49" - integrity sha512-kH/a+Fjpr54M2JbHg9RXwMjZ9O+XVsdOuE5JCpWRibJP1Mfl1md8gY6y6zstmVY8COrSqFvMZWB+PzwaTWjTGw== +"@react-types/combobox@^3.13.1": + version "3.13.1" + resolved "https://registry.yarnpkg.com/@react-types/combobox/-/combobox-3.13.1.tgz#d7d843f45501ad141f74ba62ed46d2e991b2d6a0" + integrity sha512-7xr+HknfhReN4QPqKff5tbKTe2kGZvH+DGzPYskAtb51FAAiZsKo+WvnNAvLwg3kRoC9Rkn4TAiVBp/HgymRDw== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/datepicker@^3.8.3": - version "3.8.3" - resolved "https://registry.yarnpkg.com/@react-types/datepicker/-/datepicker-3.8.3.tgz#3d54003a92c3a37f35309373ddbb9c663135b631" - integrity sha512-Y4qfPRBB6uzocosCOWSYMuwiZ3YXwLWQYiFB4KCglkvHyltbNz76LgoBEnclYA5HjwosIk4XywiXvHSYry8JnQ== +"@react-types/datepicker@^3.9.0": + version "3.9.0" + resolved "https://registry.yarnpkg.com/@react-types/datepicker/-/datepicker-3.9.0.tgz#86e2a4e23e9fbf8299a12bd8aba9b1a52cf44725" + integrity sha512-dbKL5Qsm2MQwOTtVQdOcKrrphcXAqDD80WLlSQrBLg+waDuuQ7H+TrvOT0thLKloNBlFUGnZZfXGRHINpih/0g== dependencies: - "@internationalized/date" "^3.5.6" - "@react-types/calendar" "^3.4.10" - "@react-types/overlays" "^3.8.10" - "@react-types/shared" "^3.25.0" + "@internationalized/date" "^3.6.0" + "@react-types/calendar" "^3.5.0" + "@react-types/overlays" "^3.8.11" + "@react-types/shared" "^3.26.0" -"@react-types/dialog@^3.5.13": - version "3.5.13" - resolved "https://registry.yarnpkg.com/@react-types/dialog/-/dialog-3.5.13.tgz#7100f9d5a25626cea2d2d8a755f4c0aa625faa68" - integrity sha512-9k8daVcAqQsySkzDY6NIVlyGxtpEip4TKuLyzAehthbv78GQardD5fHdjQ6eXPRS4I2qZrmytrFFrlOnwWVGHw== +"@react-types/dialog@^3.5.14": + version "3.5.14" + resolved "https://registry.yarnpkg.com/@react-types/dialog/-/dialog-3.5.14.tgz#97e568dc38ede4312ba6a12eda855c5e32c7cd47" + integrity sha512-OXWMjrALwrlgw8aHD8SeRm/s3tbAssdaEh2h73KUSeFau3fU3n5mfKv+WnFqsEaOtN261o48l7hTlS6615H9AA== dependencies: - "@react-types/overlays" "^3.8.10" - "@react-types/shared" "^3.25.0" + "@react-types/overlays" "^3.8.11" + "@react-types/shared" "^3.26.0" -"@react-types/grid@^3.2.9": - version "3.2.9" - resolved "https://registry.yarnpkg.com/@react-types/grid/-/grid-3.2.9.tgz#2a3e7b78cdca2df60e408b6f4f2bc6173ac98a0e" - integrity sha512-eMw0d2UIZ4QTzGgD1wGGPw0cv67KjAOCp4TcwWjgDV7Wa5SVV/UvOmpnIVDyfhkG/4KRI5OR9h+isy76B726qA== +"@react-types/grid@^3.2.10": + version "3.2.10" + resolved "https://registry.yarnpkg.com/@react-types/grid/-/grid-3.2.10.tgz#d2d1d124ed9472e3dedc48e91c941a7ad23bdc83" + integrity sha512-Z5cG0ITwqjUE4kWyU5/7VqiPl4wqMJ7kG/ZP7poAnLmwRsR8Ai0ceVn+qzp5nTA19cgURi8t3LsXn3Ar1FBoog== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/link@^3.5.8": - version "3.5.8" - resolved "https://registry.yarnpkg.com/@react-types/link/-/link-3.5.8.tgz#6a21ec7999cc1a5dbc71456b0739e27054114c3a" - integrity sha512-l/YGXddgAbLnIT7ekftXrK1D4n8NlLQwx0d4usyZpaxP1KwPzuwng20DxynamLc1atoKBqbUtZAnz32pe7vYgw== +"@react-types/link@^3.5.9": + version "3.5.9" + resolved "https://registry.yarnpkg.com/@react-types/link/-/link-3.5.9.tgz#bf61ff2780581de03920e6e43260844a81a38d2f" + integrity sha512-JcKDiDMqrq/5Vpn+BdWQEuXit4KN4HR/EgIi3yKnNbYkLzxBoeQZpQgvTaC7NEQeZnSqkyXQo3/vMUeX/ZNIKw== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/listbox@^3.5.2": - version "3.5.2" - resolved "https://registry.yarnpkg.com/@react-types/listbox/-/listbox-3.5.2.tgz#4a807338574e0560d0411b6ce60661d4ca74be6b" - integrity sha512-ML/Bt/MeO0FiixcuFQ+smpu1WguxTOqHDjSnhc1vcNxVQFWQOhyVy01LAY2J/T9TjfjyYGD41vyMTI0f6fcLEQ== +"@react-types/listbox@^3.5.3": + version "3.5.3" + resolved "https://registry.yarnpkg.com/@react-types/listbox/-/listbox-3.5.3.tgz#c5dbe8a67d67ca59de6b88aaa2f20fcf61fee1c5" + integrity sha512-v1QXd9/XU3CCKr2Vgs7WLcTr6VMBur7CrxHhWZQQFExsf9bgJ/3wbUdjy4aThY/GsYHiaS38EKucCZFr1QAfqA== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/menu@^3.9.12": - version "3.9.12" - resolved "https://registry.yarnpkg.com/@react-types/menu/-/menu-3.9.12.tgz#d8dd7ec5bdc4435db463cf56e79219a0f4a1a667" - integrity sha512-1SPnkHKJdvOfwv9fEgK1DI6DYRs4D3hW2XcWlLhVXSjaC68CzOHGwFhKIKvZiDTW/11L770PRSEloIxHR09uFQ== +"@react-types/menu@^3.9.13": + version "3.9.13" + resolved "https://registry.yarnpkg.com/@react-types/menu/-/menu-3.9.13.tgz#a666c2233cbdb495202586df86a798601788f74d" + integrity sha512-7SuX6E2tDsqQ+HQdSvIda1ji/+ujmR86dtS9CUu5yWX91P25ufRjZ72EvLRqClWNQsj1Xl4+2zBDLWlceznAjw== dependencies: - "@react-types/overlays" "^3.8.10" - "@react-types/shared" "^3.25.0" + "@react-types/overlays" "^3.8.11" + "@react-types/shared" "^3.26.0" -"@react-types/meter@^3.4.4": - version "3.4.4" - resolved "https://registry.yarnpkg.com/@react-types/meter/-/meter-3.4.4.tgz#e554eafa4583883b1f29bdc42a54e59c1a72e468" - integrity sha512-0SEmPkShByC1gYkW7l+iJPg8QfEe2VrgwTciAtTfC4KIqAYmJVQtq6L+4d72EMxOh8RpQHePaY/RFHEJXAh72A== +"@react-types/meter@^3.4.5": + version "3.4.5" + resolved "https://registry.yarnpkg.com/@react-types/meter/-/meter-3.4.5.tgz#e06d4a2fabd24989c73541b032123c5de495b613" + integrity sha512-04w1lEtvP/c3Ep8ND8hhH2rwjz2MtQ8o8SNLhahen3u0rX3jKOgD4BvHujsyvXXTMjj1Djp74sGzNawb4Ppi9w== dependencies: - "@react-types/progress" "^3.5.7" + "@react-types/progress" "^3.5.8" -"@react-types/numberfield@^3.8.6": - version "3.8.6" - resolved "https://registry.yarnpkg.com/@react-types/numberfield/-/numberfield-3.8.6.tgz#58bf9d5157f5c217a198a284a11382fb5bbb737f" - integrity sha512-VtWEMAXUO1S9EEZI8whc7xv6DVccxhbWsRthMCg/LxiwU3U5KAveadNc2c5rtXkRpd3cnD5xFzz3dExXdmHkAg== +"@react-types/numberfield@^3.8.7": + version "3.8.7" + resolved "https://registry.yarnpkg.com/@react-types/numberfield/-/numberfield-3.8.7.tgz#5a697fdf1bc405dbf55dafed713d47ed79f8e54b" + integrity sha512-KccMPi39cLoVkB2T0V7HW6nsxQVAwt89WWCltPZJVGzsebv/k0xTQlPVAgrUake4kDLoE687e3Fr/Oe3+1bDhw== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/overlays@^3.8.10": - version "3.8.10" - resolved "https://registry.yarnpkg.com/@react-types/overlays/-/overlays-3.8.10.tgz#9315b7d376f2877ef531cb42217327833eabcd15" - integrity sha512-IcnB+VYfAJazRjWhBKZTmVMh3KTp/B1rRbcKkPx6t8djP9UQhKcohP7lAALxjJ56Jjz/GFC6rWyUcnYH0NFVRA== +"@react-types/overlays@^3.8.11": + version "3.8.11" + resolved "https://registry.yarnpkg.com/@react-types/overlays/-/overlays-3.8.11.tgz#313964703b2a23572138120b619d35da33445dfd" + integrity sha512-aw7T0rwVI3EuyG5AOaEIk8j7dZJQ9m34XAztXJVZ/W2+4pDDkLDbJ/EAPnuo2xGYRGhowuNDn4tDju01eHYi+w== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/progress@^3.5.7": - version "3.5.7" - resolved "https://registry.yarnpkg.com/@react-types/progress/-/progress-3.5.7.tgz#adba7b5a15163f9c523a00a681513aa37e688314" - integrity sha512-EqMDHmlpoZUZzTjdejGIkSM0pS2LBI9NdadHf3bDNTycHv+5L1xpMHUg8RGOW8a3sRVLRvfN1aO9l75QZkyj+w== +"@react-types/progress@^3.5.8": + version "3.5.8" + resolved "https://registry.yarnpkg.com/@react-types/progress/-/progress-3.5.8.tgz#62ce4207c7e8d640b794c6d89063ce21bdb5970d" + integrity sha512-PR0rN5mWevfblR/zs30NdZr+82Gka/ba7UHmYOW9/lkKlWeD7PHgl1iacpd/3zl/jUF22evAQbBHmk1mS6Mpqw== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/radio@^3.8.4": - version "3.8.4" - resolved "https://registry.yarnpkg.com/@react-types/radio/-/radio-3.8.4.tgz#5133d4cd666f20d2449941ca88abe6e56a5c07ff" - integrity sha512-GCuOwQL19iwKa74NAIk9hv4ivyI8oW1+ZCuc2fzyDdeQjzTIlv3qrIyShwpVy1IoI7/4DYTMZm/YXPoKhu5TTA== +"@react-types/radio@^3.8.5": + version "3.8.5" + resolved "https://registry.yarnpkg.com/@react-types/radio/-/radio-3.8.5.tgz#8e2dd1911fba829b7f1ebb40bccf9ca483f021fc" + integrity sha512-gSImTPid6rsbJmwCkTliBIU/npYgJHOFaI3PNJo7Y0QTAnFelCtYeFtBiWrFodSArSv7ASqpLLUEj9hZu/rxIg== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/searchfield@^3.5.9": - version "3.5.9" - resolved "https://registry.yarnpkg.com/@react-types/searchfield/-/searchfield-3.5.9.tgz#c7cd9902fd9057baf1eb156dc21e1d7436187ae2" - integrity sha512-c/x8BWpH1Zq+fWpeBtzw2AhQhGi7ahWPicV7PlnqwIGO0MrH/QCjX0dj+I+1xpcAh8Eq6ECa79HE74Rw6aJmFg== +"@react-types/searchfield@^3.5.10": + version "3.5.10" + resolved "https://registry.yarnpkg.com/@react-types/searchfield/-/searchfield-3.5.10.tgz#015a42bf8417618b848035b88f0aba98572beceb" + integrity sha512-7wW4pJzbReawoGPu8a4l+CODTCDN088EN/ysUzl622ewim57PjArjix+lpO4+aEtJqS9HKpq8UEbjwo9axpcUA== dependencies: - "@react-types/shared" "^3.25.0" - "@react-types/textfield" "^3.9.7" + "@react-types/shared" "^3.26.0" + "@react-types/textfield" "^3.10.0" -"@react-types/select@^3.9.7": - version "3.9.7" - resolved "https://registry.yarnpkg.com/@react-types/select/-/select-3.9.7.tgz#d339cca27414c4f6e0bb5c9501a20d0c6249df42" - integrity sha512-Jva4ixfB4EEdy+WmZkUoLiQI7vVfHPxM73VuL7XDxvAO+YKiIztDTcU720QVNhxTMmQvCxfRBXWar8aodCjLiw== +"@react-types/select@^3.9.8": + version "3.9.8" + resolved "https://registry.yarnpkg.com/@react-types/select/-/select-3.9.8.tgz#2443b82549b65821f85876a5b803e6d04ae6343e" + integrity sha512-RGsYj2oFjXpLnfcvWMBQnkcDuKkwT43xwYWZGI214/gp/B64tJiIUgTM5wFTRAeGDX23EePkhCQF+9ctnqFd6g== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/shared@^3.25.0": - version "3.25.0" - resolved "https://registry.yarnpkg.com/@react-types/shared/-/shared-3.25.0.tgz#7223baf72256e918a3c29081bb1ecc6fad4fbf58" - integrity sha512-OZSyhzU6vTdW3eV/mz5i6hQwQUhkRs7xwY2d1aqPvTdMe0+2cY7Fwp45PAiwYLEj73i9ro2FxF9qC4DvHGSCgQ== +"@react-types/shared@^3.25.0", "@react-types/shared@^3.26.0": + version "3.26.0" + resolved "https://registry.yarnpkg.com/@react-types/shared/-/shared-3.26.0.tgz#21a8b579f0097ee78de18e3e580421ced89e4c8c" + integrity sha512-6FuPqvhmjjlpEDLTiYx29IJCbCNWPlsyO+ZUmCUXzhUv2ttShOXfw8CmeHWHftT/b2KweAWuzqSlfeXPR76jpw== -"@react-types/slider@^3.7.6": - version "3.7.6" - resolved "https://registry.yarnpkg.com/@react-types/slider/-/slider-3.7.6.tgz#4196b2bc98902b36de6de31a748d04b76770d228" - integrity sha512-z72wnEzSge6qTD9TUoUPp1A4j4jXk/MVii6rGE78XeE/Pq7HyyjU5bCagryMr9PC9MKa/oTiHcshKqWBDf57GA== +"@react-types/slider@^3.7.7": + version "3.7.7" + resolved "https://registry.yarnpkg.com/@react-types/slider/-/slider-3.7.7.tgz#f00450c6268665ff2ad38ad69bdf51d84ff2341a" + integrity sha512-lYTR9zXQV2fSEm/G3gwDENWiki1IXd/oorsgf0zu1DBi2SQDbOsLsGUXiwvD24Xy6OkUuhAqjLPPexezo7+u9g== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/switch@^3.5.6": - version "3.5.6" - resolved "https://registry.yarnpkg.com/@react-types/switch/-/switch-3.5.6.tgz#e18dcce0298ab52b70c9cdcdfe5e16224e7a248d" - integrity sha512-gJ8t2yTCgcitz4ON4ELcLLmtlDkn2MUjjfu3ez/cwA1X/NUluPYkhXj5Z6H+KOlnveqrKCZDRoTgK74cQ6Cvfg== +"@react-types/switch@^3.5.7": + version "3.5.7" + resolved "https://registry.yarnpkg.com/@react-types/switch/-/switch-3.5.7.tgz#f3fb589fce5819ca1c15f12479bf348e053adc27" + integrity sha512-1IKiq510rPTHumEZuhxuazuXBa2Cuxz6wBIlwf3NCVmgWEvU+uk1ETG0sH2yymjwCqhtJDKXi+qi9HSgPEDwAg== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/table@^3.10.2": - version "3.10.2" - resolved "https://registry.yarnpkg.com/@react-types/table/-/table-3.10.2.tgz#5e0be00eb61899ac6c9c322c6bad9cf94cbe157b" - integrity sha512-YzA4hcsYfnFFpA2UyGb1KKhLpWgaj5daApqjp126tCIosl8k1KxZmhKD50cwH0Jm19lALJseqo5VdlcJtcr4qg== +"@react-types/table@^3.10.3": + version "3.10.3" + resolved "https://registry.yarnpkg.com/@react-types/table/-/table-3.10.3.tgz#33959348641500e406abe330074f84b0c75ae4ac" + integrity sha512-Ac+W+m/zgRzlTU8Z2GEg26HkuJFswF9S6w26r+R3MHwr8z2duGPvv37XRtE1yf3dbpRBgHEAO141xqS2TqGwNg== dependencies: - "@react-types/grid" "^3.2.9" - "@react-types/shared" "^3.25.0" + "@react-types/grid" "^3.2.10" + "@react-types/shared" "^3.26.0" -"@react-types/tabs@^3.3.10": - version "3.3.10" - resolved "https://registry.yarnpkg.com/@react-types/tabs/-/tabs-3.3.10.tgz#98270b972b48e2272740391aaba033c620dec07e" - integrity sha512-s/Bw/HCIdWJPBw4O703ghKqhjGsIerRMIDxA88hbQYzfTDD6bkFDjCnsP2Tyy1G8Dg2rSPFUEE+k+PpLzqeEfQ== +"@react-types/tabs@^3.3.11": + version "3.3.11" + resolved "https://registry.yarnpkg.com/@react-types/tabs/-/tabs-3.3.11.tgz#b7db710ce2ca42a4e72cd2a581070212d2b07793" + integrity sha512-BjF2TqBhZaIcC4lc82R5pDJd1F7kstj1K0Nokhz99AGYn8C0ITdp6lR+DPVY9JZRxKgP9R2EKfWGI90Lo7NQdA== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/textfield@^3.9.7": - version "3.9.7" - resolved "https://registry.yarnpkg.com/@react-types/textfield/-/textfield-3.9.7.tgz#79b0bd2286dbf8ba39233279d1d7a3a3575ad553" - integrity sha512-vU5+QCOF9HgWGjAmmy+cpJibVW5voFomC5POmYHokm7kivYcMMjlonsgWwg/0xXrqE2qosH3tpz4jFoEuig1NQ== +"@react-types/textfield@^3.10.0": + version "3.10.0" + resolved "https://registry.yarnpkg.com/@react-types/textfield/-/textfield-3.10.0.tgz#10df39b75334174490a539ecae71ad19f5ea074d" + integrity sha512-ShU3d6kLJGQjPXccVFjM3KOXdj3uyhYROqH9YgSIEVxgA9W6LRflvk/IVBamD9pJYTPbwmVzuP0wQkTDupfZ1w== dependencies: - "@react-types/shared" "^3.25.0" + "@react-types/shared" "^3.26.0" -"@react-types/tooltip@^3.4.12": - version "3.4.12" - resolved "https://registry.yarnpkg.com/@react-types/tooltip/-/tooltip-3.4.12.tgz#70f5f1552a8c68fc21e5c400481c0f7fedc5ce30" - integrity sha512-FwsdSQ3UDIDORanQMGMLyzSUabw4AkKhwcRdPv4d5OT8GmJr7mBdZynfcsrKLJ0fzskIypMqspoutZidsI0MQg== +"@react-types/tooltip@^3.4.13": + version "3.4.13" + resolved "https://registry.yarnpkg.com/@react-types/tooltip/-/tooltip-3.4.13.tgz#f73fdc5c56790b7bd7c0d5382d0c758bd659e9d7" + integrity sha512-KPekFC17RTT8kZlk7ZYubueZnfsGTDOpLw7itzolKOXGddTXsrJGBzSB4Bb060PBVllaDO0MOrhPap8OmrIl1Q== dependencies: - "@react-types/overlays" "^3.8.10" - "@react-types/shared" "^3.25.0" + "@react-types/overlays" "^3.8.11" + "@react-types/shared" "^3.26.0" "@remix-run/router@1.19.0": version "1.19.0" From 6b3c9c99a139d1b0ab2d1bb2082be78cfa448d30 Mon Sep 17 00:00:00 2001 From: kaustubh-darekar Date: Mon, 2 Dec 2024 14:50:21 +0530 Subject: [PATCH 273/292] enable_communities flag removed in backend (#906) From 47a2db7b7cb55732f211b5d4a372da885f84cb6d Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 3 Dec 2024 10:09:22 +0000 Subject: [PATCH 274/292] added multimode metrics for json and fix for gemini ground truth metrics --- frontend/src/components/ChatBot/Chatbot.tsx | 13 ++++---- .../components/ChatBot/MetricsCheckbox.tsx | 5 ++- .../src/components/ChatBot/MetricsTab.tsx | 7 ++++- .../components/ChatBot/MultiModeMetrics.tsx | 31 ++++++++++++++++--- .../components/ChatBot/NotAvailableMetric.tsx | 20 ++++++++++++ .../Deduplication/index.tsx | 4 +-- .../SelectedJobList.tsx | 4 +-- frontend/src/utils/Constants.ts | 12 +++++++ frontend/src/utils/Utils.ts | 2 +- 9 files changed, 80 insertions(+), 18 deletions(-) create mode 100644 frontend/src/components/ChatBot/NotAvailableMetric.tsx diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 8f2a72057..aa84764bc 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -249,7 +249,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) ) ); } @@ -264,7 +264,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) ) ); } @@ -273,7 +273,7 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId + (msg.id === chatbotMessageId ? { ...msg, modes: { @@ -281,7 +281,7 @@ const Chatbot: FC = (props) => { [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, }, } - : msg + : msg) ) ); } @@ -294,7 +294,7 @@ const Chatbot: FC = (props) => { if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId + (msg.id === chatbotMessageId ? { ...msg, isLoading: false, @@ -306,7 +306,7 @@ const Chatbot: FC = (props) => { }, }, } - : msg + : msg) ) ); } @@ -601,6 +601,7 @@ const Chatbot: FC = (props) => { nodes, tokensUsed, model, + multiModelMetrics, }, downloadLinkRef, 'graph-builder-chat-details.json' diff --git a/frontend/src/components/ChatBot/MetricsCheckbox.tsx b/frontend/src/components/ChatBot/MetricsCheckbox.tsx index 593468c2b..5b7fe89c0 100644 --- a/frontend/src/components/ChatBot/MetricsCheckbox.tsx +++ b/frontend/src/components/ChatBot/MetricsCheckbox.tsx @@ -3,13 +3,16 @@ import { Checkbox } from '@neo4j-ndl/react'; function MetricsCheckbox({ enableReference, toggleReferenceVisibility, + isDisabled = false, }: { enableReference: boolean; toggleReferenceVisibility: React.DispatchWithoutAction; + isDisabled?: boolean; }) { return ( diff --git a/frontend/src/components/ChatBot/MetricsTab.tsx b/frontend/src/components/ChatBot/MetricsTab.tsx index 33479eb13..b39292d94 100644 --- a/frontend/src/components/ChatBot/MetricsTab.tsx +++ b/frontend/src/components/ChatBot/MetricsTab.tsx @@ -12,6 +12,7 @@ import { capitalize } from '../../utils/Utils'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; import { metricsinfo } from '../../utils/Constants'; +import NotAvailableMetric from './NotAvailableMetric'; function MetricsTab({ metricsLoading, metricDetails, @@ -65,7 +66,11 @@ function MetricsTab({ columnHelper.accessor((row) => row.score as number, { id: 'Score', cell: (info) => { - return {info.getValue().toFixed(2)}; + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; }, }), ], diff --git a/frontend/src/components/ChatBot/MultiModeMetrics.tsx b/frontend/src/components/ChatBot/MultiModeMetrics.tsx index c97d917b2..8bd89f1d8 100644 --- a/frontend/src/components/ChatBot/MultiModeMetrics.tsx +++ b/frontend/src/components/ChatBot/MultiModeMetrics.tsx @@ -11,6 +11,7 @@ import { Banner, Box, DataGrid, DataGridComponents, Flex, IconButton, Popover, T import { multimodelmetric } from '../../types'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; +import NotAvailableMetric from './NotAvailableMetric'; export default function MultiModeMetrics({ data, @@ -50,7 +51,11 @@ export default function MultiModeMetrics({ columnHelper.accessor((row) => row.answer_relevancy as number, { id: 'Answer Relevancy', cell: (info) => { - return {info.getValue().toFixed(2)}; + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; }, header: () => ( @@ -73,7 +78,11 @@ export default function MultiModeMetrics({ columnHelper.accessor((row) => row.faithfulness as number, { id: 'Faithfullness', cell: (info) => { - return {info.getValue().toFixed(2)}; + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; }, header: () => ( @@ -96,7 +105,11 @@ export default function MultiModeMetrics({ columnHelper.accessor((row) => row.context_entity_recall_score as number, { id: 'Entity Recall Score', cell: (info) => { - return {info.getValue()?.toFixed(2)}; + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; }, header: () => ( @@ -119,7 +132,11 @@ export default function MultiModeMetrics({ columnHelper.accessor((row) => row.semantic_score as number, { id: 'Semantic Score', cell: (info) => { - return {info.getValue()?.toFixed(2)}; + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; }, header: () => ( @@ -142,7 +159,11 @@ export default function MultiModeMetrics({ columnHelper.accessor((row) => row.rouge_score as number, { id: 'Rouge Score', cell: (info) => { - return {info.getValue()?.toFixed(2)}; + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; }, header: () => ( diff --git a/frontend/src/components/ChatBot/NotAvailableMetric.tsx b/frontend/src/components/ChatBot/NotAvailableMetric.tsx new file mode 100644 index 000000000..f8df4c3c4 --- /dev/null +++ b/frontend/src/components/ChatBot/NotAvailableMetric.tsx @@ -0,0 +1,20 @@ +import { Flex, IconButton, Popover, Typography } from '@neo4j-ndl/react'; +import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; + +export default function NotAvailableMetric() { + return ( + + N.A + + + + + + + + Some metrics are not available for Gemini model. + + + + ); +} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index df30c25c1..d59b46e21 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -104,12 +104,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - d.e.elementId === nodeid + (d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d + : d) ); }); }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx index fc5f39206..447b1f846 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx @@ -11,11 +11,11 @@ export default function SelectedJobList({ }) { const ongoingPostProcessingTasks = useMemo( () => - isGdsActive + (isGdsActive ? postProcessingTasks.includes('enable_communities') ? postProcessingTasks : postProcessingTasks.filter((s) => s != 'enable_communities') - : postProcessingTasks.filter((s) => s != 'enable_communities'), + : postProcessingTasks.filter((s) => s != 'enable_communities')), [isGdsActive, postProcessingTasks] ); return ( diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 7cf35f506..692d0faef 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -47,6 +47,18 @@ export const supportedLLmsForRagas = [ 'fireworks_llama_v3_70b', 'bedrock_claude_3_5_sonnet', ]; +export const supportedLLmsForGroundTruthMetrics = [ + 'openai_gpt_3.5', + 'openai_gpt_4', + 'openai_gpt_4o', + 'openai_gpt_4o_mini', + 'azure_ai_gpt_35', + 'azure_ai_gpt_4o', + 'groq_llama3_70b', + 'anthropic_claude_3_5_sonnet', + 'fireworks_llama_v3_70b', + 'bedrock_claude_3_5_sonnet', +]; export const prodllms = process.env.VITE_LLM_MODELS_PROD?.trim() != '' ? (process.env.VITE_LLM_MODELS_PROD?.split(',') as string[]) diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 82217535d..7314c55e7 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -395,7 +395,7 @@ export const isFileCompleted = (waitingFile: CustomFile, item: SourceNode) => waitingFile && item.status === 'Completed'; export const calculateProcessedCount = (prev: number, batchSize: number) => - prev === batchSize ? batchSize - 1 : prev + 1; + (prev === batchSize ? batchSize - 1 : prev + 1); export const isProcessingFileValid = (item: SourceNode, userCredentials: UserCredentials) => { return item.status === 'Processing' && item.fileName != undefined && userCredentials && userCredentials.database; From 40ac6288cb2b2a9dacf9092e11a721de80142a5b Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:49:47 +0530 Subject: [PATCH 275/292] Enhancement default model selection (#911) * LLM dropdown default value changes * merge dev * removing the stored default model --- frontend/src/components/Content.tsx | 5 +++-- frontend/src/components/Dropdown.tsx | 4 ++++ frontend/src/context/UsersFiles.tsx | 5 +++-- frontend/src/utils/Constants.ts | 5 ----- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 0f4d03c8c..932557dd3 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -13,7 +13,6 @@ import { batchSize, buttonCaptions, chatModeLables, - defaultLLM, largeFileSize, llms, RETRY_OPIONS, @@ -90,6 +89,7 @@ const Content: React.FC = ({ processedCount, setProcessedCount, setchatModes, + model } = useFileContext(); const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView' | 'neighborView'>( 'tableView' @@ -537,6 +537,7 @@ const Content: React.FC = ({ setProcessedCount(0); setConnectionStatus(false); localStorage.removeItem('password'); + localStorage.removeItem('selectedModel') setUserCredentials({ uri: '', password: '', userName: '', database: '' }); setSelectedNodes([]); setSelectedRels([]); @@ -881,7 +882,7 @@ const Content: React.FC = ({ onSelect={handleDropdownChange} options={llms ?? ['']} placeholder='Select LLM Model' - defaultValue={defaultLLM} + defaultValue={model} view='ContentView' isDisabled={false} /> diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index a3ebd0d5a..2782addd9 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -17,6 +17,10 @@ const DropdownComponent: React.FC = ({ const isLargeDesktop = useMediaQuery(`(min-width:1440px )`); const handleChange = (selectedOption: OptionType | null | void) => { onSelect(selectedOption); + const existingModel = localStorage.getItem('selectedModel'); + if (existingModel != selectedOption?.value) { + localStorage.setItem('selectedModel', selectedOption?.value ?? ''); + } }; const allOptions = useMemo(() => options, [options]); return ( diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index 25f031f95..68bc50bdf 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -6,7 +6,7 @@ import { OptionType, showTextFromSchemaDialogType, } from '../types'; -import { chatModeLables, defaultLLM, getStoredSchema } from '../utils/Constants'; +import { chatModeLables, getStoredSchema, llms } from '../utils/Constants'; import { useCredentials } from './UserCredentials'; import Queue from '../utils/Queue'; @@ -16,13 +16,14 @@ const FileContextProvider: FC = ({ children }) => { const selectedNodeLabelstr = localStorage.getItem('selectedNodeLabels'); const selectedNodeRelsstr = localStorage.getItem('selectedRelationshipLabels'); const persistedQueue = localStorage.getItem('waitingQueue'); + const selectedModel = localStorage.getItem('selectedModel'); const { userCredentials } = useCredentials(); const [files, setFiles] = useState<(File | null)[] | []>([]); const [filesData, setFilesData] = useState([]); const [queue, setQueue] = useState( new Queue(JSON.parse(persistedQueue ?? JSON.stringify({ queue: [] })).queue) ); - const [model, setModel] = useState(defaultLLM); + const [model, setModel] = useState(selectedModel ?? llms[0]); const [graphType, setGraphType] = useState('Knowledge Graph Entities'); const [selectedNodes, setSelectedNodes] = useState([]); const [selectedRels, setSelectedRels] = useState([]); diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 692d0faef..a80344209 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -28,11 +28,6 @@ export const llms = 'bedrock_claude_3_5_sonnet', ]; -export const defaultLLM = llms?.includes('openai_gpt_4o') - ? 'openai_gpt_4o' - : llms?.includes('gemini_1.5_pro') - ? 'gemini_1.5_pro' - : 'diffbot'; export const supportedLLmsForRagas = [ 'openai_gpt_3.5', 'openai_gpt_4', From 9d023a4c2631a0df89754d39709828db2ec5db22 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:57:57 +0530 Subject: [PATCH 276/292] Bug Fixing (#916) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Staging to main (#735) * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * disabled the sumbit buttom on loading * Deduplication tab (#566) * de-duplication API * Update De-Duplicate query * created the Deduplication tab * added the API service * added the removeable tags for similar nodes in deduplication tab * Integrate Tag * added GraphLabel * added loader state * added the merge service * integrated the merge API * Merge Query issue fixed * Auto refresh the duplicate nodes after merging operation * added the description for de duplication * reset on merging --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Update frontend_docs.adoc (#538) * Update frontend_docs.adoc * doc update * Images * Images folder change * Images folder change * test image * Update frontend_docs.adoc * image change * Update frontend_docs.adoc * Update frontend_docs.adoc * added the Graph Mode SS * added the Query SS * Update frontend_docs.adoc * conflics fix * conflict fix * Update frontend_docs.adoc --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * updated langchain versions (#565) * Update the De-Duplication query * Node relationship id type none issue (#547) * de-duplication API * Update De-Duplicate query * Issue fixed Nodes,Relationship Id and Type None or Blank * added the tooltips * type fix * Unneccory import * added score threshold and added some error handling (#571) * Update requirements.txt * Tooltip and other UI fixes (#572) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-auth… * Update FileTable.tsx * merge changes * Update README.md * Update README.md * Update README.md * Update README.md * Graph visualization improvement and chunk text details (#860) * Update README.md * Dev to staging (#859) * processing count updated on cancel * format fixes * remove whitespace for enviroment variable which due to an error "xxx may not contain whitespace" (#707) * updated disconnected nodes * updated disconnected nodes * fix: Processed count update on failed condition * added disconnected and up nodes * removed __Entity__ labels * removed graph_object * removed graph object in the function * resetting the alert message on success scenario * Modified queries * populate graph schema * not clearing the password when there is error scenario * fixed the vector index loading issue * fix: empty credentials payload for recreate vector index api * chatbot status (#676) * chatbot status * connection status check for ASK button * refresh disable check * review comment resolved * format fixes * added properties and modified to entity labels * Post processing call after all files completion (#716) * Dev To STAGING (#532) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#535) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * for… * enable communities removed and delete queries updated * nginx security headers * removed parameter to include chunk nodes as entity source * Ready to reprocess * communities fix * dependency fix Reprocess fix * icon size fix * z index fix * icon fix * icon gap fix * upload status * Checkbox fix * fix: table issue chat bot clear issue * new document deletion, 2nd level community deletion handled * claer history * graph info and delete query update * format changes * refresh changes * refresh changes * chat history changes * code refactor --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: Ikko Eltociear Ashimine Co-authored-by: Jayanth T Co-authored-by: Jayanth T Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: Michael Hunger Co-authored-by: Kain Shu <44948284+Kain-90@users.noreply.github.com> Co-authored-by: destiny966113 <90891243+destiny966113@users.noreply.github.com> Co-authored-by: Pravesh1988 Co-authored-by: edenbuaa Co-authored-by: kaustubh-darekar Co-authored-by: a-s-poorna --- frontend/src/components/ChatBot/Chatbot.tsx | 12 +- .../src/components/Graph/GraphViewModal.tsx | 32 ++--- frontend/src/components/Layout/PageLayout.tsx | 116 ++++++++---------- .../Deduplication/index.tsx | 4 +- .../SelectedJobList.tsx | 4 +- frontend/src/utils/Constants.ts | 2 +- frontend/src/utils/Utils.ts | 2 +- 7 files changed, 81 insertions(+), 91 deletions(-) diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index aa84764bc..877dcdab4 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -249,7 +249,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg ) ); } @@ -264,7 +264,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg ) ); } @@ -273,7 +273,7 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId + msg.id === chatbotMessageId ? { ...msg, modes: { @@ -281,7 +281,7 @@ const Chatbot: FC = (props) => { [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, }, } - : msg) + : msg ) ); } @@ -294,7 +294,7 @@ const Chatbot: FC = (props) => { if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId + msg.id === chatbotMessageId ? { ...msg, isLoading: false, @@ -306,7 +306,7 @@ const Chatbot: FC = (props) => { }, }, } - : msg) + : msg ) ); } diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index 27610680d..b93cf4b98 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -58,15 +58,16 @@ const GraphViewModal: React.FunctionComponent = ({ const [graphType, setGraphType] = useState([]); const [disableRefresh, setDisableRefresh] = useState(false); const [selected, setSelected] = useState<{ type: EntityType; id: string } | undefined>(undefined); + const [mode, setMode] = useState(false); const graphQuery: string = graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -97,7 +98,12 @@ const GraphViewModal: React.FunctionComponent = ({ }, []); useEffect(() => { - const updateGraphType = graphTypeFromNodes(allNodes); + let updateGraphType; + if (mode) { + updateGraphType = graphTypeFromNodes(nodes); + } else { + updateGraphType = graphTypeFromNodes(allNodes); + } if (Array.isArray(updateGraphType)) { setGraphType(updateGraphType); } @@ -108,10 +114,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -260,11 +266,10 @@ const GraphViewModal: React.FunctionComponent = ({ const newGraphSelected = [...graphType]; if (currentIndex === -1) { newGraphSelected.push(graph); - initGraph(newGraphSelected, allNodes, allRelationships, scheme); } else { newGraphSelected.splice(currentIndex, 1); - initGraph(newGraphSelected, allNodes, allRelationships, scheme); } + initGraph(newGraphSelected, allNodes, allRelationships, scheme); setSearchQuery(''); setGraphType(newGraphSelected); setSelected(undefined); @@ -295,11 +300,8 @@ const GraphViewModal: React.FunctionComponent = ({ // Refresh the graph with nodes and relations if file is processing const handleRefresh = () => { setDisableRefresh(true); + setMode(true); graphApi('refreshMode'); - setGraphType(graphType); - setNodes(nodes); - setRelationships(relationships); - setScheme(newScheme); }; // when modal closes reset all states to default diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index dca910e43..71908bc64 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -14,14 +14,12 @@ import useSpeechSynthesis from '../../hooks/useSpeech'; import FallBackDialog from '../UI/FallBackDialog'; import { envConnectionAPI } from '../../services/ConnectAPI'; import { healthStatus } from '../../services/HealthStatus'; +import { useNavigate } from 'react-router'; const ConnectionModal = lazy(() => import('../Popups/ConnectionModal/ConnectionModal')); -interface PageLayoutProp { - isChatOnly?: boolean; -} -const PageLayout: React.FC = () => { +const PageLayout: React.FC = () => { const [openConnection, setOpenConnection] = useState({ openPopUp: false, chunksExists: false, @@ -38,6 +36,8 @@ const PageLayout: React.FC = () => { const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); const [showGenericModal, toggleGenericModal] = useReducer((s) => !s, false); + const navigate = useNavigate(); + const toggleLeftDrawer = () => { if (largedesktops) { setIsLeftExpanded(!isLeftExpanded); @@ -87,6 +87,7 @@ const PageLayout: React.FC = () => { const setUserCredentialsFromSession = (neo4jConnection: string) => { if (!neo4jConnection) { console.error('Invalid session data:', neo4jConnection); + setOpenConnection((prev) => ({ ...prev, openPopUp: true })); return; } try { @@ -138,21 +139,21 @@ const PageLayout: React.FC = () => { return false; } }; + // Handle case where session exists + let backendApiResponse; try { - // Handle case where session exists - if (session && isDev) { - let backendApiResponse; - try { - backendApiResponse = await envConnectionAPI(); - const connectionData = backendApiResponse.data; - const envCredentials = { - uri: connectionData.data.uri, - password: atob(connectionData.data.password), - userName: connectionData.data.user_name, - database: connectionData.data.database, - isReadonlyUser: !connectionData.data.write_access, - isGds: connectionData.data.gds_status, - }; + if (isDev) { + backendApiResponse = await envConnectionAPI(); + const connectionData = backendApiResponse.data; + const envCredentials = { + uri: connectionData.data.uri, + password: atob(connectionData.data.password), + userName: connectionData.data.user_name, + database: connectionData.data.database, + isReadonlyUser: !connectionData.data.write_access, + isGds: connectionData.data.gds_status, + }; + if (session && isDev) { const updated = updateSessionIfNeeded(envCredentials, session); if (!updated) { setUserCredentialsFromSession(session); // Using stored session if no update is needed @@ -160,65 +161,51 @@ const PageLayout: React.FC = () => { setConnectionStatus(Boolean(connectionData.data.graph_connection)); setIsBackendConnected(true); handleDisconnectButtonState(false); - } catch (error) { - console.error('Error in DEV session handling:', error); - handleDisconnectButtonState(true); - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - setErrorMessage(backendApiResponse?.data.error); } - } else { - // For PROD, picking the session values - setUserCredentialsFromSession(session as string); - setConnectionStatus(true); - setIsBackendConnected(true); - handleDisconnectButtonState(true); - return; - } - // Handle case where no session exists - if (isDev) { - let envAPiResponse; - try { - envAPiResponse = await envConnectionAPI(); - const connectionData = envAPiResponse.data.data; - const credentials = { - uri: connectionData.uri, - password: atob(connectionData.password), - userName: connectionData.user_name, - database: connectionData.database, - isReadonlyUser: !connectionData.write_access, - isGds: connectionData.gds_status, - }; - setUserCredentials(credentials); + else if (!session) { + setUserCredentials(envCredentials); localStorage.setItem( 'neo4j.connection', JSON.stringify({ - uri: credentials.uri, - user: credentials.userName, - password: btoa(credentials.password), - database: credentials.database, + uri: envCredentials.uri, + user: envCredentials.userName, + password: btoa(envCredentials.password), + database: envCredentials.database, userDbVectorIndex: 384, - isReadOnlyUser: credentials.isReadonlyUser, - isGDS: credentials.isGds, + isReadOnlyUser: envCredentials.isReadonlyUser, + isGDS: envCredentials.isGds, }) ); - setConnectionStatus(Boolean(connectionData.graph_connection)); - setIsBackendConnected(true); + setConnectionStatus(true); + setGdsActive(envCredentials.isGds); + setIsReadOnlyUser(envCredentials.isReadonlyUser); handleDisconnectButtonState(false); - } catch (error) { - console.error('Error in DEV no-session handling:', error); + } + } + else { + if (session && !isDev) { + // For PROD, picking the session values + setUserCredentialsFromSession(session as string); + setConnectionStatus(true); handleDisconnectButtonState(true); + return; + } + else { setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - setErrorMessage(envAPiResponse?.data.error); + handleDisconnectButtonState(true); } - } else { - // For PROD: Open modal to manually connect - handleDisconnectButtonState(true); - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - setIsBackendConnected(false); } } catch (error) { - console.error('Error in initializeConnection:', error); - setIsBackendConnected(false); + console.error('Error in DEV session handling:', error); + if (session) { + setUserCredentialsFromSession(session as string); + setConnectionStatus(true); + } + else { + setErrorMessage(backendApiResponse?.data.error); + setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + } + handleDisconnectButtonState(true); } } initializeConnection(); @@ -249,6 +236,7 @@ const PageLayout: React.FC = () => { currentMode: 'graph+vector+fulltext', }, ]); + navigate('.', { replace: true, state: null }); } } catch (error) { console.log(error); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index d59b46e21..df30c25c1 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -104,12 +104,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - (d.e.elementId === nodeid + d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d) + : d ); }); }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx index 447b1f846..fc5f39206 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx @@ -11,11 +11,11 @@ export default function SelectedJobList({ }) { const ongoingPostProcessingTasks = useMemo( () => - (isGdsActive + isGdsActive ? postProcessingTasks.includes('enable_communities') ? postProcessingTasks : postProcessingTasks.filter((s) => s != 'enable_communities') - : postProcessingTasks.filter((s) => s != 'enable_communities')), + : postProcessingTasks.filter((s) => s != 'enable_communities'), [isGdsActive, postProcessingTasks] ); return ( diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index a80344209..6044cb748 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -302,7 +302,7 @@ export const graphLabels = { community: 'Communities', noNodesRels: 'No Nodes and No relationships', neighborView: 'neighborView', - chunksInfo: 'We are processing 50 chunks at a time', + chunksInfo: 'We are visualizing 50 chunks at a time', }; export const RESULT_STEP_SIZE = 25; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 7314c55e7..82217535d 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -395,7 +395,7 @@ export const isFileCompleted = (waitingFile: CustomFile, item: SourceNode) => waitingFile && item.status === 'Completed'; export const calculateProcessedCount = (prev: number, batchSize: number) => - (prev === batchSize ? batchSize - 1 : prev + 1); + prev === batchSize ? batchSize - 1 : prev + 1; export const isProcessingFileValid = (item: SourceNode, userCredentials: UserCredentials) => { return item.status === 'Processing' && item.fileName != undefined && userCredentials && userCredentials.database; From b85d31e5a8d67fad3de715ad7617540b99efd1ca Mon Sep 17 00:00:00 2001 From: kaustubh-darekar Date: Tue, 3 Dec 2024 18:05:33 +0530 Subject: [PATCH 277/292] Update documentation (#915) * Update backend_docs.adoc Documents updated for v6 * Update example.env --- backend/example.env | 1 + docs/backend/backend_docs.adoc | 73 ++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/backend/example.env b/backend/example.env index 7fa3cb480..7cf9b13ac 100644 --- a/backend/example.env +++ b/backend/example.env @@ -31,6 +31,7 @@ DUPLICATE_TEXT_DISTANCE = "" #examples LLM_MODEL_CONFIG_openai_gpt_3.5="gpt-3.5-turbo-0125,openai_api_key" LLM_MODEL_CONFIG_openai_gpt_4o_mini="gpt-4o-mini-2024-07-18,openai_api_key" +LLM_MODEL_CONFIG_openai_gpt_4o="gpt-4o-2024-11-20,openai_api_key" LLM_MODEL_CONFIG_gemini_1.5_pro="gemini-1.5-pro-002" LLM_MODEL_CONFIG_gemini_1.5_flash="gemini-1.5-flash-002" LLM_MODEL_CONFIG_diffbot="diffbot,diffbot_api_key" diff --git a/docs/backend/backend_docs.adoc b/docs/backend/backend_docs.adoc index 7f1fd11dc..058f88a58 100644 --- a/docs/backend/backend_docs.adoc +++ b/docs/backend/backend_docs.adoc @@ -978,6 +978,79 @@ The API responsible for a evaluating chatbot responses on the basis of different } } } +.... + +=== Evaluate response with ground truth +---- +POST /additional_metrics +---- + +The API responsible for a evaluating chatbot responses on the basis of different metrics such as context entity recall, semantic score, rouge score. This reuqire additional ground truth to be supplied by user. This utilises RAGAS library to calculate these metrics. + +**API Parameters :** + +* `question`= User query for the chatbot +* `context`= context retrieved by retrieval mode used for answer generation +* `answer`= answer generated by chatbot +* `reference`= ground truth/ expected answer provided by user +* `model`= LLM model +* `mode`= Retrieval mode used for answer generationRetrieval mode used for answer generation + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "graph_vector_fulltext": { + "rouge_score": 1.0, + "semantic_score": 0.9842, + "context_entity_recall_score": 0.5 + } + } +} +.... + +=== Fetch chunk text + +---- +POST /fetch_chunktext +---- + +The API responsible for a fetching text associated with a particular chunk and chunk metadata. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name +* `document_name` = Name of document for which chunks needs to be fetched. +* `page no` = page number for multipage + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "pageitems": [ + { + "text": "By T. Albert Illustrated by: maaillustrations.com Science has never been so much fun. Here is all that a child needs to know about water, rain, hail, sleet and water cycle. When Professor Mois Ture teaches- little readers read, learn and ask for more….. Published by Monkey Pen Ltd Dear Supporter, Thank you for downloading our childrens books. Monkey Pens Vision is to provide thousands of free childrens books to young readers around the globe. Please share our books with your friends and family to support our mission. Thank you Please make a donation on Patreon to support Monkey Pens Free Book Project: Hi, I am Professor Mois Ture and I will be telling you about water. You can call it RAIN. You can call it SNOW. You can call it SLEET. You can call it HAIL. But it’s WATER all the same. Did you ever wonder how", + "position": 1, + "pagenumber": 1 + }, + { + "text": " it HAIL. But it’s WATER all the same. Did you ever wonder how old water is or where it comes from? The answers may surprise you. The next time you see a pond or even a glass of water, think about how old that water might be. Do you really want to know ? I thought you did. Did you brush your teeth this morning? Well, some of the water that you used could have fallen from the sky yesterday, or a week, or month ago. It’s pretty new. But, some part of that water is very old and was around during the time of the dinosaurs, or even longer. Or maybe it’s a little newer; like from the time when the Pharaohs were building pyramids. You see there is only a limited amount of water and it gets recycled. Yep! It keeps going round and round. We call it the “Water Cycle.” Yes – You", + "position": 2, + "pagenumber": 2 + } + ], + "total_pages": 1 + }, + "message": "Total elapsed API time 0.48" +} + .... === Backend Database connection ---- From 978409094a3975df4a3c4cc1e036f4ffa2a37934 Mon Sep 17 00:00:00 2001 From: kaustubh-darekar Date: Tue, 3 Dec 2024 19:49:20 +0530 Subject: [PATCH 278/292] Updating langchain-neo4j package (#891) * Adding langchain-Neo4j package in application * langchain neo4j package updated, requirement.txt file updated with only necessary packages and their latest versions. unnecessary imports removed. * refactored code. replaced print statements with logging info * Updated sentence transformer to huggingface embeddings --- backend/requirements.txt | 206 ++++----------------- backend/score.py | 6 +- backend/src/QA_integration.py | 11 +- backend/src/create_chunks.py | 4 +- backend/src/diffbot_transformer.py | 3 +- backend/src/document_sources/gcs_bucket.py | 5 +- backend/src/document_sources/local_file.py | 2 - backend/src/document_sources/youtube.py | 37 ---- backend/src/graphDB_dataAccess.py | 7 +- backend/src/graph_query.py | 2 - backend/src/llm.py | 19 -- backend/src/main.py | 49 ++--- backend/src/make_relationships.py | 2 +- backend/src/post_processing.py | 2 +- backend/src/ragas_eval.py | 5 +- backend/src/shared/common_fn.py | 18 +- 16 files changed, 71 insertions(+), 307 deletions(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index 5f0f5dc62..de1fc1136 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,183 +1,57 @@ -aiohttp==3.9.3 -aiosignal==1.3.1 -annotated-types==0.6.0 -antlr4-python3-runtime==4.9.3 -anyio==4.3.0 -async-timeout==4.0.3 asyncio==3.4.3 -attrs==23.2.0 -backoff==2.2.1 -beautifulsoup4==4.12.3 -boto3==1.34.140 -botocore==1.34.140 -cachetools==5.3.3 -certifi==2024.2.2 -cffi==1.16.0 -chardet==5.2.0 -charset-normalizer==3.3.2 -click==8.1.7 -coloredlogs==15.0.1 -contourpy==1.2.0 -cryptography==42.0.2 -cycler==0.12.1 -dataclasses-json==0.6.4 -dataclasses-json-speakeasy==0.5.11 -Deprecated==1.2.14 -distro==1.9.0 -docstring_parser==0.16 -effdet==0.4.1 -emoji==2.10.1 -exceptiongroup==1.2.0 -fastapi==0.111.0 +boto3==1.35.69 +botocore==1.35.69 +certifi==2024.8.30 +fastapi==0.115.5 fastapi-health==0.4.0 -filelock==3.13.1 -filetype==1.2.0 -flatbuffers==23.5.26 -fonttools==4.49.0 -frozenlist==1.4.1 -fsspec==2024.2.0 -google-api-core==2.18.0 -google-auth==2.29.0 -google_auth_oauthlib==1.2.0 -google-cloud-aiplatform==1.58.0 -google-cloud-bigquery==3.19.0 +google-api-core==2.23.0 +google-auth==2.36.0 +google_auth_oauthlib==1.2.1 google-cloud-core==2.4.1 -google-cloud-resource-manager==1.12.3 -google-cloud-storage==2.17.0 -google-crc32c==1.5.0 -google-resumable-media==2.7.0 -googleapis-common-protos==1.63.0 -greenlet==3.0.3 -grpc-google-iam-v1==0.13.0 -grpcio==1.62.1 -google-ai-generativelanguage==0.6.6 -grpcio-status==1.62.1 -h11==0.14.0 -httpcore==1.0.4 -httpx==0.27.0 -huggingface-hub -humanfriendly==10.0 -idna==3.6 -importlib-resources==6.1.1 +json-repair==0.30.2 pip-install==1.3.5 -iopath==0.1.10 -Jinja2==3.1.3 -jmespath==1.0.1 -joblib==1.3.2 -jsonpatch==1.33 -jsonpath-python==1.0.6 -jsonpointer==2.4 -json-repair==0.25.2 -kiwisolver==1.4.5 -langchain==0.3.0 -langchain-aws==0.2.1 -langchain-anthropic==0.2.1 -langchain-fireworks==0.2.0 -langchain-google-genai==2.0.0 -langchain-community==0.3.0 -langchain-core==0.3.5 -langchain-experimental==0.3.1 -langchain-google-vertexai==2.0.1 -langchain-groq==0.2.0 -langchain-openai==0.2.0 -langchain-text-splitters==0.3.0 +langchain==0.3.8 +langchain-aws==0.2.7 +langchain-anthropic==0.3.0 +langchain-fireworks==0.2.5 +langchain-community==0.3.8 +langchain-core==0.3.21 +langchain-experimental==0.3.3 +langchain-google-vertexai==2.0.7 +langchain-groq==0.2.1 +langchain-openai==0.2.9 +langchain-text-splitters==0.3.2 +langchain-huggingface==0.1.2 langdetect==1.0.9 -langsmith==0.1.128 -layoutparser==0.3.4 +langsmith==0.1.146 langserve==0.3.0 -#langchain-cli==0.0.25 -lxml==5.1.0 -MarkupSafe==2.1.5 -marshmallow==3.20.2 -matplotlib==3.7.2 -mpmath==1.3.0 -multidict==6.0.5 -mypy-extensions==1.0.0 neo4j-rust-ext -networkx==3.2.1 -nltk==3.8.1 -numpy==1.26.4 -omegaconf==2.3.0 -onnx==1.16.1 -onnxruntime==1.18.1 -openai==1.47.1 -opencv-python==4.8.0.76 -orjson==3.9.15 -packaging==23.2 -pandas==2.2.0 -pdf2image==1.17.0 -pdfminer.six==20221105 -pdfplumber==0.10.4 -pikepdf==8.11.0 -pillow==10.2.0 -pillow_heif==0.15.0 -portalocker==2.8.2 -proto-plus==1.23.0 -protobuf==4.23.4 -psutil==6.0.0 -pyasn1==0.6.0 -pyasn1_modules==0.4.0 -pycocotools==2.0.7 -pycparser==2.21 -pydantic==2.8.2 -pydantic_core==2.20.1 -pyparsing==3.0.9 -pypdf==4.0.1 -PyPDF2==3.0.1 -pypdfium2==4.27.0 -pytesseract==0.3.10 -python-dateutil==2.8.2 +nltk==3.9.1 +openai==1.55.1 +opencv-python==4.10.0.84 +psutil==6.1.0 +pydantic==2.9.0 python-dotenv==1.0.1 -python-iso639==2024.2.7 -python-magic==0.4.27 -python-multipart==0.0.9 -pytube==15.0.0 -pytz==2024.1 -PyYAML==6.0.1 -rapidfuzz==3.6.1 -regex==2023.12.25 -requests==2.32.3 -rsa==4.9 -s3transfer==0.10.1 -safetensors==0.4.1 -shapely==2.0.3 -six==1.16.0 -sniffio==1.3.1 -soupsieve==2.5 -starlette==0.37.2 -sse-starlette==2.1.2 +PyPDF2==3.0.1 +PyMuPDF==1.24.14 +starlette==0.41.3 +sse-starlette==2.1.3 starlette-session==0.4.3 -sympy==1.12 -tabulate==0.9.0 -tenacity==8.2.3 -tiktoken==0.7.0 -timm==0.9.12 -tokenizers==0.19 -tqdm==4.66.2 -transformers==4.42.3 -types-protobuf -types-requests -typing-inspect==0.9.0 -typing_extensions==4.12.2 -tzdata==2024.1 -unstructured==0.14.9 -unstructured-client==0.23.8 -unstructured-inference==0.7.36 -unstructured.pytesseract==0.3.12 -unstructured[all-docs]==0.14.9 +tqdm==4.67.1 +unstructured[all-docs]==0.16.6 urllib3==2.2.2 -uvicorn==0.30.1 -gunicorn==22.0.0 +uvicorn==0.32.1 +gunicorn==23.0.0 wikipedia==1.4.0 wrapt==1.16.0 yarl==1.9.4 -youtube-transcript-api==0.6.2 +youtube-transcript-api==0.6.3 zipp==3.17.0 -sentence-transformers==3.0.1 -google-cloud-logging==3.10.0 -PyMuPDF==1.24.5 +sentence-transformers==3.3.1 +google-cloud-logging==3.11.3 pypandoc==1.13 -graphdatascience==1.10 +graphdatascience==1.12 Secweb==1.11.0 -ragas==0.2.2 +ragas==0.2.6 rouge_score==0.1.2 +langchain-neo4j==0.1.1 \ No newline at end of file diff --git a/backend/score.py b/backend/score.py index 0c2884c25..62fb20b7b 100644 --- a/backend/score.py +++ b/backend/score.py @@ -36,6 +36,7 @@ from src.ragas_eval import * from starlette.types import ASGIApp, Message, Receive, Scope, Send import gzip +from langchain_neo4j import Neo4jGraph logger = CustomLogger() CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") @@ -581,7 +582,6 @@ async def generate(): graph = create_graph_database_connection(uri, userName, decoded_password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.get_current_status_document_node(file_name) - # print(f'Result of document status in SSE : {result}') if len(result) > 0: status = json.dumps({'fileName':file_name, 'status':result[0]['Status'], @@ -668,7 +668,7 @@ async def get_document_status(file_name, url, userName, password, database): } else: status = {'fileName':file_name, 'status':'Failed'} - print(f'Result of document status in refresh : {result}') + logging.info(f'Result of document status in refresh : {result}') return create_api_response('Success',message="",file_name=status) except Exception as e: message=f"Unable to get the document status" @@ -961,7 +961,7 @@ async def fetch_chunktext( async def backend_connection_configuation(): try: graph = Neo4jGraph() - print(f'login connection status of object: {graph}') + logging.info(f'login connection status of object: {graph}') if graph is not None: graph_connection = True isURI = os.getenv('NEO4J_URI') diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index fff7d7923..f50a36efb 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -4,15 +4,13 @@ import logging import threading -from concurrent.futures import ThreadPoolExecutor from datetime import datetime from typing import Any from dotenv import load_dotenv - -# LangChain imports -from langchain_community.vectorstores.neo4j_vector import Neo4jVector -from langchain_community.chat_message_histories import Neo4jChatMessageHistory +from langchain_neo4j import Neo4jVector +from langchain_neo4j import Neo4jChatMessageHistory +from langchain_neo4j import GraphCypherQAChain from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnableBranch @@ -21,7 +19,6 @@ from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline from langchain_text_splitters import TokenTextSplitter from langchain_core.messages import HumanMessage, AIMessage -from langchain.chains import GraphCypherQAChain from langchain_community.chat_message_histories import ChatMessageHistory from langchain_core.callbacks import StdOutCallbackHandler, BaseCallbackHandler @@ -38,8 +35,6 @@ from src.llm import get_llm from src.shared.common_fn import load_embedding_model from src.shared.constants import * -from src.graphDB_dataAccess import graphDBdataAccess -from src.ragas_eval import get_ragas_metrics load_dotenv() EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') diff --git a/backend/src/create_chunks.py b/backend/src/create_chunks.py index 621785a31..d5e93d14f 100644 --- a/backend/src/create_chunks.py +++ b/backend/src/create_chunks.py @@ -1,8 +1,7 @@ from langchain_text_splitters import TokenTextSplitter from langchain.docstore.document import Document -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph import logging -import os from src.document_sources.youtube import get_chunks_with_timestamps, get_calculated_timestamps import re @@ -25,7 +24,6 @@ def split_file_into_chunks(self): A list of chunks each of which is a langchain Document. """ logging.info("Split file into smaller chunks") - # number_of_chunks_allowed = int(os.environ.get('NUMBER_OF_CHUNKS_ALLOWED')) text_splitter = TokenTextSplitter(chunk_size=200, chunk_overlap=20) if 'page' in self.pages[0].metadata: chunks = [] diff --git a/backend/src/diffbot_transformer.py b/backend/src/diffbot_transformer.py index a8e8db3fb..e16e54efb 100644 --- a/backend/src/diffbot_transformer.py +++ b/backend/src/diffbot_transformer.py @@ -1,5 +1,6 @@ from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer -from langchain_community.graphs import Neo4jGraph +#from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph from langchain.docstore.document import Document from typing import List import os diff --git a/backend/src/document_sources/gcs_bucket.py b/backend/src/document_sources/gcs_bucket.py index 91830f591..851692462 100644 --- a/backend/src/document_sources/gcs_bucket.py +++ b/backend/src/document_sources/gcs_bucket.py @@ -101,15 +101,12 @@ def merge_file_gcs(bucket_name, original_file_name: str, folder_name_sha1_hashed try: storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) - # Retrieve chunks from GCS - # blobs = storage_client.list_blobs(bucket_name, prefix=folder_name_sha1_hashed) - # print(f'before sorted blobs: {blobs}') chunks = [] for i in range(1,total_chunks+1): blob_name = folder_name_sha1_hashed + '/' + f"{original_file_name}_part_{i}" blob = bucket.blob(blob_name) if blob.exists(): - print(f'Blob Name: {blob.name}') + logging.info(f'Blob Name: {blob.name}') chunks.append(blob.download_as_bytes()) blob.delete() diff --git a/backend/src/document_sources/local_file.py b/backend/src/document_sources/local_file.py index ed46210f4..3d5bc08db 100644 --- a/backend/src/document_sources/local_file.py +++ b/backend/src/document_sources/local_file.py @@ -20,10 +20,8 @@ def load_document_content(file_path): if Path(file_path).suffix.lower() == '.pdf': - print("in if") return PyMuPDFLoader(file_path) else: - print("in else") return UnstructuredFileLoader(file_path, mode="elements",autodetect_encoding=True) def get_documents_from_file_by_path(file_path,file_name): diff --git a/backend/src/document_sources/youtube.py b/backend/src/document_sources/youtube.py index dacda09f0..dee97e230 100644 --- a/backend/src/document_sources/youtube.py +++ b/backend/src/document_sources/youtube.py @@ -1,25 +1,16 @@ -from pathlib import Path from langchain.docstore.document import Document -from langchain_community.document_loaders import YoutubeLoader -from pytube import YouTube from youtube_transcript_api import YouTubeTranscriptApi import logging from urllib.parse import urlparse,parse_qs from difflib import SequenceMatcher from datetime import timedelta -from langchain_community.document_loaders.youtube import TranscriptFormat from src.shared.constants import YOUTUBE_CHUNK_SIZE_SECONDS from typing import List, Dict, Any import os import re -from langchain_community.document_loaders import GoogleApiClient, GoogleApiYoutubeLoader def get_youtube_transcript(youtube_id): try: - #transcript = YouTubeTranscriptApi.get_transcript(youtube_id) - # transcript_list = YouTubeTranscriptApi.list_transcripts(youtube_id) - # transcript = transcript_list.find_transcript(["en"]) - # transcript_pieces: List[Dict[str, Any]] = transcript.fetch() proxy = os.environ.get("YOUTUBE_TRANSCRIPT_PROXY") proxies = { 'https': proxy } transcript_pieces = YouTubeTranscriptApi.get_transcript(youtube_id, proxies = proxies) @@ -28,15 +19,6 @@ def get_youtube_transcript(youtube_id): message = f"Youtube transcript is not available for youtube Id: {youtube_id}" raise Exception(message) -# def get_youtube_combined_transcript(youtube_id): -# try: -# transcript_dict = get_youtube_transcript(youtube_id) -# transcript = YouTubeTranscriptApi.get_transcript(youtube_id) -# return transcript -# except Exception as e: -# message = f"Youtube transcript is not available for youtube Id: {youtube_id}" -# raise Exception(message) - def get_youtube_combined_transcript(youtube_id): try: transcript_dict = get_youtube_transcript(youtube_id) @@ -64,25 +46,6 @@ def create_youtube_url(url): def get_documents_from_youtube(url): try: match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',url) - # youtube_loader = YoutubeLoader.from_youtube_url(url, - # language=["en-US", "en-gb", "en-ca", "en-au","zh-CN", "zh-Hans", "zh-TW", "fr-FR","de-DE","it-IT","ja-JP","pt-BR","ru-RU","es-ES"], - # translation = "en", - # add_video_info=True, - # transcript_format=TranscriptFormat.CHUNKS, - # chunk_size_seconds=YOUTUBE_CHUNK_SIZE_SECONDS) - # video_id = parse_qs(urlparse(url).query).get('v') - # cred_path = os.path.join(os.getcwd(),"llm-experiments_credentials.json") - # print(f'Credential file path on youtube.py {cred_path}') - # google_api_client = GoogleApiClient(service_account_path=Path(cred_path)) - # youtube_loader_channel = GoogleApiYoutubeLoader( - # google_api_client=google_api_client, - # video_ids=[video_id[0].strip()], add_video_info=True - # ) - # youtube_transcript = youtube_loader_channel.load() - # pages = youtube_loader.load() - # print(f'youtube page_content: {youtube_transcript[0].page_content}') - # print(f'youtube id: {youtube_transcript[0].metadata["id"]}') - # print(f'youtube title: {youtube_transcript[0].metadata["snippet"]["title"]}') transcript= get_youtube_transcript(match.group(1)) transcript_content='' counter = YOUTUBE_CHUNK_SIZE_SECONDS diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index 41a37631c..aa9034f4c 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -1,7 +1,6 @@ import logging import os -from datetime import datetime -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph from src.shared.common_fn import create_gcs_bucket_folder_name_hashed, delete_uploaded_local_file, load_embedding_model from src.document_sources.gcs_bucket import delete_file_from_gcs from src.shared.constants import BUCKET_UPLOAD,NODEREL_COUNT_QUERY_WITH_COMMUNITY, NODEREL_COUNT_QUERY_WITHOUT_COMMUNITY @@ -103,7 +102,7 @@ def update_source_node(self, obj_source_node:sourceNode): param= {"props":params} - print(f'Base Param value 1 : {param}') + logging.info(f'Base Param value 1 : {param}') query = "MERGE(d:Document {fileName :$props.fileName}) SET d += $props" logging.info("Update source node properties") self.graph.query(query,param) @@ -411,7 +410,7 @@ def get_duplicate_nodes_list(self): def merge_duplicate_nodes(self,duplicate_nodes_list): nodes_list = json.loads(duplicate_nodes_list) - print(f'Nodes list to merge {nodes_list}') + logging.info(f'Nodes list to merge {nodes_list}') query = """ UNWIND $rows AS row CALL { with row diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py index 86739ba6c..dc5a64a2c 100644 --- a/backend/src/graph_query.py +++ b/backend/src/graph_query.py @@ -207,8 +207,6 @@ def get_graph_results(uri, username, password,database,document_names): document_nodes = extract_node_elements(records) document_relationships = extract_relationships(records) - print(query) - logging.info(f"no of nodes : {len(document_nodes)}") logging.info(f"no of relations : {len(document_relationships)}") result = { diff --git a/backend/src/llm.py b/backend/src/llm.py index de50fa181..f19648ed6 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -6,17 +6,13 @@ from langchain_groq import ChatGroq from langchain_google_vertexai import HarmBlockThreshold, HarmCategory from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer -import concurrent.futures -from concurrent.futures import ThreadPoolExecutor from langchain_experimental.graph_transformers import LLMGraphTransformer -from langchain_core.prompts import ChatPromptTemplate from langchain_anthropic import ChatAnthropic from langchain_fireworks import ChatFireworks from langchain_aws import ChatBedrock from langchain_community.chat_models import ChatOllama import boto3 import google.auth -from src.shared.constants import MODEL_VERSIONS, PROMPT_TO_ALL_LLMs def get_llm(model: str): """Retrieve the specified language model based on the model name.""" @@ -27,7 +23,6 @@ def get_llm(model: str): if "gemini" in model: model_name = env_value credentials, project_id = google.auth.default() - #model_name = MODEL_VERSIONS[model] llm = ChatVertexAI( model_name=model_name, #convert_system_message_to_human=True, @@ -43,7 +38,6 @@ def get_llm(model: str): }, ) elif "openai" in model: - #model_name = MODEL_VERSIONS[model] model_name, api_key = env_value.split(",") llm = ChatOpenAI( api_key=api_key, @@ -175,20 +169,7 @@ async def get_graph_document_list( allowed_nodes=allowedNodes, allowed_relationships=allowedRelationship, ignore_tool_usage=True, - #prompt = ChatPromptTemplate.from_messages(["system",PROMPT_TO_ALL_LLMs]) ) - # with ThreadPoolExecutor(max_workers=10) as executor: - # for chunk in combined_chunk_document_list: - # chunk_doc = Document( - # page_content=chunk.page_content.encode("utf-8"), metadata=chunk.metadata - # ) - # futures.append( - # executor.submit(llm_transformer.convert_to_graph_documents, [chunk_doc]) - # ) - - # for i, future in enumerate(concurrent.futures.as_completed(futures)): - # graph_document = future.result() - # graph_document_list.append(graph_document[0]) if isinstance(llm,DiffbotGraphTransformer): graph_document_list = llm_transformer.convert_to_graph_documents(combined_chunk_document_list) diff --git a/backend/src/main.py b/backend/src/main.py index c5add5343..b2c56caaa 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -1,4 +1,4 @@ -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph from src.shared.constants import (BUCKET_UPLOAD, PROJECT_ID, QUERY_TO_GET_CHUNKS, QUERY_TO_DELETE_EXISTING_ENTITIES, QUERY_TO_GET_LAST_PROCESSED_CHUNK_POSITION, @@ -8,7 +8,6 @@ DELETE_ENTITIES_AND_START_FROM_BEGINNING, QUERY_TO_GET_NODES_AND_RELATIONS_OF_A_DOCUMENT) from src.shared.schema_extraction import schema_extraction_from_text -from langchain_community.document_loaders import GoogleApiClient, GoogleApiYoutubeLoader from dotenv import load_dotenv from datetime import datetime import logging @@ -27,7 +26,6 @@ import re from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader import warnings -from pytube import YouTube import sys import shutil import urllib.parse @@ -142,34 +140,13 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type): obj_source_node.created_at = datetime.now() match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',obj_source_node.url) logging.info(f"match value: {match}") - # file_path = os.path.join(os.path.dirname(__file__),"llm-experiments_credentials.json") - # logging.info(f'file path {file_path}') - - # if os.path.exists(file_path): - # logging.info("File path exist") - # with open(file_path,'r') as file: - # data = json.load(file) - # # logging.info(f"Project id : {data['project_id']}") - # # logging.info(f"Universal domain: {data['universe_domain']}") - # else: - # logging.warning("credntial file path not exist") - video_id = parse_qs(urlparse(youtube_url).query).get('v') - - # google_api_client = GoogleApiClient(service_account_path=Path(file_path)) - # youtube_loader_channel = GoogleApiYoutubeLoader( - # google_api_client=google_api_client, - # video_ids=[video_id[0].strip()], add_video_info=True - # ) - # youtube_transcript = youtube_loader_channel.load() - # page_content = youtube_transcript[0].page_content - - obj_source_node.file_name = match.group(1)#youtube_transcript[0].metadata["snippet"]["title"] - #obj_source_node.file_name = YouTube(youtube_url).title + obj_source_node.file_name = match.group(1) transcript= get_youtube_combined_transcript(match.group(1)) - print(transcript) + logging.info(f"Youtube transcript : {transcript}") if transcript==None or len(transcript)==0: message = f"Youtube transcript is not available for : {obj_source_node.file_name}" + logging.info(f"Youtube transcript is not available for : {obj_source_node.file_name}") raise Exception(message) else: obj_source_node.file_size = sys.getsizeof(transcript) @@ -212,7 +189,7 @@ def create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type async def extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, fileName, allowedNodes, allowedRelationship, retry_condition): logging.info(f'Process file name :{fileName}') - if retry_condition is None: + if not retry_condition: gcs_file_cache = os.environ.get('GCS_FILE_CACHE') if gcs_file_cache == 'True': folder_name = create_gcs_bucket_folder_name_hashed(uri, fileName) @@ -226,7 +203,7 @@ async def extract_graph_from_file_local_file(uri, userName, password, database, return await processing_source(uri, userName, password, database, model, fileName, [], allowedNodes, allowedRelationship, True, merged_file_path, retry_condition) async def extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition): - if retry_condition is None: + if not retry_condition: if(aws_access_key_id==None or aws_secret_access_key==None): raise Exception('Please provide AWS access and secret keys') else: @@ -240,9 +217,8 @@ async def extract_graph_from_file_s3(uri, userName, password, database, model, s return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) async def extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition): - if retry_condition is None: + if not retry_condition: file_name, pages = get_documents_from_web_page(source_url) - if pages==None or len(pages)==0: raise Exception(f'Content is not available for given URL : {file_name}') return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) @@ -250,7 +226,7 @@ async def extract_graph_from_web_page(uri, userName, password, database, model, return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) async def extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition): - if retry_condition is None: + if not retry_condition: file_name, pages = get_documents_from_youtube(source_url) if pages==None or len(pages)==0: @@ -260,7 +236,7 @@ async def extract_graph_from_file_youtube(uri, userName, password, database, mod return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) async def extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition): - if retry_condition is None: + if not retry_condition: file_name, pages = get_documents_from_Wikipedia(wiki_query, language) if pages==None or len(pages)==0: raise Exception(f'Wikipedia page is not available for file : {file_name}') @@ -269,7 +245,7 @@ async def extract_graph_from_file_Wikipedia(uri, userName, password, database, m return await processing_source(uri, userName, password, database, model, file_name,[], allowedNodes, allowedRelationship, retry_condition=retry_condition) async def extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition): - if retry_condition is None: + if not retry_condition: file_name, pages = get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token) if pages==None or len(pages)==0: raise Exception(f'File content is not available for file : {file_name}') @@ -303,7 +279,6 @@ async def processing_source(uri, userName, password, database, model, file_name, logging.info(f'Time taken database connection: {elapsed_create_connection:.2f} seconds') uri_latency["create_connection"] = f'{elapsed_create_connection:.2f}' graphDb_data_Access = graphDBdataAccess(graph) - start_get_chunkId_chunkDoc_list = time.time() total_chunks, chunkId_chunkDoc_list = get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition) end_get_chunkId_chunkDoc_list = time.time() @@ -499,12 +474,10 @@ async def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, node_count += len(distinct_nodes) rel_count += len(relations) - print(f'node count internal func:{node_count}') - print(f'relation count internal func:{rel_count}') return node_count,rel_count,latency_processing_chunk def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): - if retry_condition is None: + if not retry_condition: logging.info("Break down file into chunks") bad_chars = ['"', "\n", "'"] for i in range(0,len(pages)): diff --git a/backend/src/make_relationships.py b/backend/src/make_relationships.py index 1ea2729e5..7d079fcf3 100644 --- a/backend/src/make_relationships.py +++ b/backend/src/make_relationships.py @@ -1,4 +1,4 @@ -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph from langchain.docstore.document import Document from src.shared.common_fn import load_embedding_model import logging diff --git a/backend/src/post_processing.py b/backend/src/post_processing.py index 7746df3d0..02fc7fb06 100644 --- a/backend/src/post_processing.py +++ b/backend/src/post_processing.py @@ -1,7 +1,7 @@ from neo4j import GraphDatabase import logging import time -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph import os from src.shared.common_fn import load_embedding_model diff --git a/backend/src/ragas_eval.py b/backend/src/ragas_eval.py index 687db37aa..c9f447242 100644 --- a/backend/src/ragas_eval.py +++ b/backend/src/ragas_eval.py @@ -8,11 +8,8 @@ from ragas.metrics import answer_relevancy, faithfulness from src.shared.common_fn import load_embedding_model from ragas.dataset_schema import SingleTurnSample -from ragas.metrics import BleuScore, RougeScore, SemanticSimilarity, ContextEntityRecall -from ragas.metrics._factual_correctness import FactualCorrectness +from ragas.metrics import RougeScore, SemanticSimilarity, ContextEntityRecall from ragas.llms import LangchainLLMWrapper -from langchain_openai import ChatOpenAI -from langchain.embeddings import OpenAIEmbeddings from ragas.embeddings import LangchainEmbeddingsWrapper import nltk diff --git a/backend/src/shared/common_fn.py b/backend/src/shared/common_fn.py index 916e01e89..0c0b4bea1 100644 --- a/backend/src/shared/common_fn.py +++ b/backend/src/shared/common_fn.py @@ -1,25 +1,15 @@ import hashlib import logging from src.document_sources.youtube import create_youtube_url -from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings +from langchain_huggingface import HuggingFaceEmbeddings from langchain_google_vertexai import VertexAIEmbeddings from langchain_openai import OpenAIEmbeddings -from langchain.docstore.document import Document -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph from langchain_community.graphs.graph_document import GraphDocument from typing import List import re import os from pathlib import Path -from langchain_openai import ChatOpenAI -from langchain_google_vertexai import ChatVertexAI -from langchain_groq import ChatGroq -from langchain_google_vertexai import HarmBlockThreshold, HarmCategory -from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer -# from neo4j.debug import watch - -# watch("neo4j") - def check_url_source(source_type, yt_url:str=None, wiki_query:str=None): language='' @@ -86,11 +76,11 @@ def load_embedding_model(embedding_model_name: str): dimension = 768 logging.info(f"Embedding: Using Vertex AI Embeddings , Dimension:{dimension}") else: - embeddings = SentenceTransformerEmbeddings( + embeddings = HuggingFaceEmbeddings( model_name="all-MiniLM-L6-v2"#, cache_folder="/embedding_model" ) dimension = 384 - logging.info(f"Embedding: Using SentenceTransformer , Dimension:{dimension}") + logging.info(f"Embedding: Using Langchain HuggingFaceEmbeddings , Dimension:{dimension}") return embeddings, dimension def save_graphDocuments_in_neo4j(graph:Neo4jGraph, graph_document_list:List[GraphDocument]): From b09131e281f06139b768e2bac5c2fa488e8d757d Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Tue, 3 Dec 2024 20:10:27 +0530 Subject: [PATCH 279/292] gcs file existance check and reprocess from last processed position check updates (#917) Co-authored-by: kaustubh-darekar --- README.md | 12 +++++++++++- backend/src/document_sources/gcs_bucket.py | 17 ++++++++++++----- backend/src/main.py | 11 ++++++----- backend/src/shared/constants.py | 12 +----------- backend/src/shared/schema_extraction.py | 1 - 5 files changed, 30 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 164e51f00..e37d3ddfe 100644 --- a/README.md +++ b/README.md @@ -161,7 +161,17 @@ Allow unauthenticated request : Yes | LLM_MODEL_CONFIG_ollama_ | Optional | | Set ollama config as - model_name,model_local_url for local deployments | | RAGAS_EMBEDDING_MODEL | Optional | openai | embedding model used by ragas evaluation framework | - +## LLMs Supported +1. OpenAI +2. Gemini +3. Azure OpenAI(dev) +4. Anthropic(dev) +5. Fireworks(dev) +6. Groq(dev) +7. Amazon Bedrock(dev) +8. Ollama(dev) +9. Diffbot +10. Other OpenAI compabtile baseurl models(dev) ## For local llms (Ollama) 1. Pull the docker imgage of ollama diff --git a/backend/src/document_sources/gcs_bucket.py b/backend/src/document_sources/gcs_bucket.py index 851692462..3aaf42e12 100644 --- a/backend/src/document_sources/gcs_bucket.py +++ b/backend/src/document_sources/gcs_bucket.py @@ -59,8 +59,14 @@ def get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, g if access_token is None: storage_client = storage.Client(project=gcs_project_id) - loader = GCSFileLoader(project_name=gcs_project_id, bucket=gcs_bucket_name, blob=blob_name, loader_func=load_document_content) - pages = loader.load() + bucket = storage_client.bucket(gcs_bucket_name) + blob = bucket.blob(blob_name) + + if blob.exists(): + loader = GCSFileLoader(project_name=gcs_project_id, bucket=gcs_bucket_name, blob=blob_name, loader_func=load_document_content) + pages = loader.load() + else : + raise Exception('File does not exist, Please re-upload the file and try again.') else: creds= Credentials(access_token) storage_client = storage.Client(project=gcs_project_id, credentials=creds) @@ -77,7 +83,7 @@ def get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, g text += page.extract_text() pages = [Document(page_content = text)] else: - raise Exception('Blob Not Found') + raise Exception(f'File Not Found in GCS bucket - {gcs_bucket_name}') return gcs_blob_filename, pages def upload_file_to_gcs(file_chunk, chunk_number, original_file_name, bucket_name, folder_name_sha1_hashed): @@ -143,7 +149,8 @@ def copy_failed_file(source_bucket_name,dest_bucket_name,folder_name, file_name) dest_bucket = storage_client.bucket(dest_bucket_name) folder_file_name = folder_name +'/'+file_name source_blob = source_bucket.blob(folder_file_name) - source_bucket.copy_blob(source_blob, dest_bucket, file_name) - logging.info(f'Failed file {file_name} copied to {dest_bucket_name} from {source_bucket_name} in GCS successfully') + if source_blob.exists(): + source_bucket.copy_blob(source_blob, dest_bucket, file_name) + logging.info(f'Failed file {file_name} copied to {dest_bucket_name} from {source_bucket_name} in GCS successfully') except Exception as e: raise Exception(e) diff --git a/backend/src/main.py b/backend/src/main.py index b2c56caaa..5ef1e4354 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -497,8 +497,8 @@ def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): chunkId_chunkDoc_list=[] chunks = graph.query(QUERY_TO_GET_CHUNKS, params={"filename":file_name}) - if chunks[0]['text'] is None or chunks[0]['text']=="" : - raise Exception(f"Chunks are not created for {file_name}. Please re-upload file and try.") + if chunks[0]['text'] is None or chunks[0]['text']=="" or not chunks : + raise Exception(f"Chunks are not created for {file_name}. Please re-upload file and try again.") else: for chunk in chunks: chunk_doc = Document(page_content=chunk['text'], metadata={'id':chunk['id'], 'position':chunk['position']}) @@ -507,15 +507,16 @@ def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): if retry_condition == START_FROM_LAST_PROCESSED_POSITION: logging.info(f"Retry : start_from_last_processed_position") starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_POSITION, params={"filename":file_name}) - if starting_chunk[0]["position"] < len(chunkId_chunkDoc_list): + + if starting_chunk and starting_chunk[0]["position"] < len(chunkId_chunkDoc_list): return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:] - elif starting_chunk[0]["position"] == len(chunkId_chunkDoc_list): + elif starting_chunk and starting_chunk[0]["position"] == len(chunkId_chunkDoc_list): starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_WITHOUT_ENTITY, params={"filename":file_name}) return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:] else: - raise Exception(f"All chunks of {file_name} are alreday processed. If you want to re-process, Please start from begnning") + raise Exception(f"All chunks of file are alreday processed. If you want to re-process, Please start from begnning") else: logging.info(f"Retry : start_from_beginning with chunks {len(chunkId_chunkDoc_list)}") diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index bf96532d9..8307dc3c9 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -1,14 +1,4 @@ -MODEL_VERSIONS = { - "openai-gpt-3.5": "gpt-3.5-turbo-0125", - "gemini-1.0-pro": "gemini-1.0-pro-001", - "gemini-1.5-pro": "gemini-1.5-pro-002", - "gemini-1.5-flash": "gemini-1.5-flash-002", - "openai-gpt-4": "gpt-4-turbo-2024-04-09", - "diffbot" : "gpt-4-turbo-2024-04-09", - "openai-gpt-4o-mini": "gpt-4o-mini-2024-07-18", - "openai-gpt-4o":"gpt-4o-2024-08-06", - "groq-llama3" : "llama3-70b-8192" - } + OPENAI_MODELS = ["openai-gpt-3.5", "openai-gpt-4o", "openai-gpt-4o-mini"] GEMINI_MODELS = ["gemini-1.0-pro", "gemini-1.5-pro", "gemini-1.5-flash"] GROQ_MODELS = ["groq-llama3"] diff --git a/backend/src/shared/schema_extraction.py b/backend/src/shared/schema_extraction.py index 80954ba65..1b7f76c92 100644 --- a/backend/src/shared/schema_extraction.py +++ b/backend/src/shared/schema_extraction.py @@ -2,7 +2,6 @@ #from langchain_core.pydantic_v1 import BaseModel, Field from pydantic.v1 import BaseModel, Field from src.llm import get_llm -from src.shared.constants import MODEL_VERSIONS from langchain_core.prompts import ChatPromptTemplate class Schema(BaseModel): From 6e9edce7e3ecafea11ef4455c0fe65cc97663051 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Wed, 4 Dec 2024 05:13:24 +0000 Subject: [PATCH 280/292] format and lint fixes --- frontend/src/components/BreakDownPopOver.tsx | 2 +- frontend/src/components/ChatBot/Chatbot.tsx | 12 ++++++------ frontend/src/components/Content.tsx | 4 ++-- .../src/components/Graph/GraphViewModal.tsx | 16 ++++++++-------- frontend/src/components/Layout/PageLayout.tsx | 17 +++++------------ .../Deduplication/index.tsx | 4 ++-- .../PostProcessingCheckList/SelectedJobList.tsx | 4 ++-- frontend/src/utils/Utils.ts | 2 +- 8 files changed, 27 insertions(+), 34 deletions(-) diff --git a/frontend/src/components/BreakDownPopOver.tsx b/frontend/src/components/BreakDownPopOver.tsx index 01ebc4f20..f6798bf97 100644 --- a/frontend/src/components/BreakDownPopOver.tsx +++ b/frontend/src/components/BreakDownPopOver.tsx @@ -11,7 +11,7 @@ export default function BreakDownPopOver({ file, isNodeCount = true }: { file: C - + } > diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 877dcdab4..aa84764bc 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -249,7 +249,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) ) ); } @@ -264,7 +264,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) ) ); } @@ -273,7 +273,7 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId + (msg.id === chatbotMessageId ? { ...msg, modes: { @@ -281,7 +281,7 @@ const Chatbot: FC = (props) => { [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, }, } - : msg + : msg) ) ); } @@ -294,7 +294,7 @@ const Chatbot: FC = (props) => { if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId + (msg.id === chatbotMessageId ? { ...msg, isLoading: false, @@ -306,7 +306,7 @@ const Chatbot: FC = (props) => { }, }, } - : msg + : msg) ) ); } diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 932557dd3..449a47387 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -89,7 +89,7 @@ const Content: React.FC = ({ processedCount, setProcessedCount, setchatModes, - model + model, } = useFileContext(); const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView' | 'neighborView'>( 'tableView' @@ -537,7 +537,7 @@ const Content: React.FC = ({ setProcessedCount(0); setConnectionStatus(false); localStorage.removeItem('password'); - localStorage.removeItem('selectedModel') + localStorage.removeItem('selectedModel'); setUserCredentials({ uri: '', password: '', userName: '', database: '' }); setSelectedNodes([]); setSelectedRels([]); diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index b93cf4b98..97a55144b 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -64,10 +64,10 @@ const GraphViewModal: React.FunctionComponent = ({ graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -114,10 +114,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 71908bc64..88f5c7386 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -18,7 +18,6 @@ import { useNavigate } from 'react-router'; const ConnectionModal = lazy(() => import('../Popups/ConnectionModal/ConnectionModal')); - const PageLayout: React.FC = () => { const [openConnection, setOpenConnection] = useState({ openPopUp: false, @@ -161,8 +160,7 @@ const PageLayout: React.FC = () => { setConnectionStatus(Boolean(connectionData.data.graph_connection)); setIsBackendConnected(true); handleDisconnectButtonState(false); - } - else if (!session) { + } else if (!session) { setUserCredentials(envCredentials); localStorage.setItem( 'neo4j.connection', @@ -181,27 +179,22 @@ const PageLayout: React.FC = () => { setIsReadOnlyUser(envCredentials.isReadonlyUser); handleDisconnectButtonState(false); } - } - else { - if (session && !isDev) { + } else if (session && !isDev) { // For PROD, picking the session values setUserCredentialsFromSession(session as string); setConnectionStatus(true); handleDisconnectButtonState(true); - return; - } - else { + + } else { setOpenConnection((prev) => ({ ...prev, openPopUp: true })); handleDisconnectButtonState(true); } - } } catch (error) { console.error('Error in DEV session handling:', error); if (session) { setUserCredentialsFromSession(session as string); setConnectionStatus(true); - } - else { + } else { setErrorMessage(backendApiResponse?.data.error); setOpenConnection((prev) => ({ ...prev, openPopUp: true })); } diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index df30c25c1..d59b46e21 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -104,12 +104,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - d.e.elementId === nodeid + (d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d + : d) ); }); }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx index fc5f39206..447b1f846 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx @@ -11,11 +11,11 @@ export default function SelectedJobList({ }) { const ongoingPostProcessingTasks = useMemo( () => - isGdsActive + (isGdsActive ? postProcessingTasks.includes('enable_communities') ? postProcessingTasks : postProcessingTasks.filter((s) => s != 'enable_communities') - : postProcessingTasks.filter((s) => s != 'enable_communities'), + : postProcessingTasks.filter((s) => s != 'enable_communities')), [isGdsActive, postProcessingTasks] ); return ( diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 82217535d..7314c55e7 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -395,7 +395,7 @@ export const isFileCompleted = (waitingFile: CustomFile, item: SourceNode) => waitingFile && item.status === 'Completed'; export const calculateProcessedCount = (prev: number, batchSize: number) => - prev === batchSize ? batchSize - 1 : prev + 1; + (prev === batchSize ? batchSize - 1 : prev + 1); export const isProcessingFileValid = (item: SourceNode, userCredentials: UserCredentials) => { return item.status === 'Processing' && item.fileName != undefined && userCredentials && userCredentials.database; From a9420ae151a9e9ed2af82fcae9821323913ee823 Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Wed, 4 Dec 2024 18:08:53 +0530 Subject: [PATCH 281/292] updated requirements (#923) --- backend/requirements.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index de1fc1136..1df0ff4de 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -2,7 +2,7 @@ asyncio==3.4.3 boto3==1.35.69 botocore==1.35.69 certifi==2024.8.30 -fastapi==0.115.5 +fastapi==0.115.6 fastapi-health==0.4.0 google-api-core==2.23.0 google-auth==2.36.0 @@ -32,6 +32,7 @@ opencv-python==4.10.0.84 psutil==6.1.0 pydantic==2.9.0 python-dotenv==1.0.1 +python-magic==0.4.27 PyPDF2==3.0.1 PyMuPDF==1.24.14 starlette==0.41.3 @@ -39,6 +40,9 @@ sse-starlette==2.1.3 starlette-session==0.4.3 tqdm==4.67.1 unstructured[all-docs]==0.16.6 +unstructured==0.16.6 +unstructured-client==0.26.2 +unstructured-inference==0.8.1 urllib3==2.2.2 uvicorn==0.32.1 gunicorn==23.0.0 From 8ce169306bddd287c663fabd1969b60e9e2bc8b5 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 5 Dec 2024 11:29:47 +0530 Subject: [PATCH 282/292] Update Constants.ts --- frontend/src/utils/Constants.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 6044cb748..d83000eca 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -366,5 +366,5 @@ export const metricsinfo: Record = { answer_relevancy: "Determines How well the answer addresses the user's question.", rouge_score: 'Determines How much the generated answer matches the reference answer, word-for-word.', semantic_score: 'Determines How well the generated answer understands the meaning of the reference answer.', - context_entity_recall_score: 'Determines the recall of entities present in both reference and retrieved contexts', + context_entity_recall_score: 'Determines the recall of entities present in both generated answer and retrieved contexts', }; From cc3dc3d8e97affa00bfede1cb1683f22fd77ae59 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 5 Dec 2024 11:59:06 +0530 Subject: [PATCH 283/292] Metric table issues (#921) * fixed the table issues * hiding context recall check * eval error with gemini resolved * context reacall metric fix --------- Co-authored-by: kaustubh-darekar --- backend/src/ragas_eval.py | 48 +++++++++++++------ .../components/ChatBot/ChatModesSwitch.tsx | 4 +- .../components/ChatBot/CommonChatActions.tsx | 8 +++- .../src/components/ChatBot/MetricsTab.tsx | 18 +------ .../components/ChatBot/MultiModeMetrics.tsx | 22 ++------- 5 files changed, 45 insertions(+), 55 deletions(-) diff --git a/backend/src/ragas_eval.py b/backend/src/ragas_eval.py index c9f447242..29f7e026c 100644 --- a/backend/src/ragas_eval.py +++ b/backend/src/ragas_eval.py @@ -5,7 +5,7 @@ from datasets import Dataset from dotenv import load_dotenv from ragas import evaluate -from ragas.metrics import answer_relevancy, faithfulness +from ragas.metrics import answer_relevancy, faithfulness,context_entity_recall from src.shared.common_fn import load_embedding_model from ragas.dataset_schema import SingleTurnSample from ragas.metrics import RougeScore, SemanticSimilarity, ContextEntityRecall @@ -24,25 +24,29 @@ def get_ragas_metrics(question: str, context: list, answer: list, model: str): try: start_time = time.time() dataset = Dataset.from_dict( - {"question": [question] * len(answer), "answer": answer, "contexts": [[ctx] for ctx in context]} + {"question": [question] * len(answer),"reference": answer, "answer": answer, "contexts": [[ctx] for ctx in context]} ) logging.info("Evaluation dataset created successfully.") if ("diffbot" in model) or ("ollama" in model): raise ValueError(f"Unsupported model for evaluation: {model}") + elif ("gemini" in model): + llm, model_name = get_llm(model=model) + llm = LangchainLLMWrapper(llm,is_finished_parser=custom_is_finished_parser) else: llm, model_name = get_llm(model=model) + llm = LangchainLLMWrapper(llm) logging.info(f"Evaluating with model: {model_name}") score = evaluate( dataset=dataset, - metrics=[faithfulness, answer_relevancy], + metrics=[faithfulness, answer_relevancy,context_entity_recall], llm=llm, embeddings=EMBEDDING_FUNCTION, ) score_dict = ( - score.to_pandas()[["faithfulness", "answer_relevancy"]] + score.to_pandas()[["faithfulness", "answer_relevancy","context_entity_recall"]] .fillna(0) .round(4) .to_dict(orient="list") @@ -67,13 +71,10 @@ async def get_additional_metrics(question: str, contexts: list, answers: list, r if ("diffbot" in model_name) or ("ollama" in model_name): raise ValueError(f"Unsupported model for evaluation: {model_name}") llm, model_name = get_llm(model=model_name) - ragas_llm = LangchainLLMWrapper(llm) embeddings = EMBEDDING_FUNCTION embedding_model = LangchainEmbeddingsWrapper(embeddings=embeddings) rouge_scorer = RougeScore() semantic_scorer = SemanticSimilarity() - entity_recall_scorer = ContextEntityRecall() - entity_recall_scorer.llm = ragas_llm semantic_scorer.embeddings = embedding_model metrics = [] for response, context in zip(answers, contexts): @@ -82,18 +83,35 @@ async def get_additional_metrics(question: str, contexts: list, answers: list, r rouge_score = round(rouge_score,4) semantic_score = await semantic_scorer.single_turn_ascore(sample) semantic_score = round(semantic_score, 4) - if "gemini" in model_name: - entity_recall_score = "Not Available" - else: - entity_sample = SingleTurnSample(reference=reference, retrieved_contexts=[context]) - entity_recall_score = await entity_recall_scorer.single_turn_ascore(entity_sample) - entity_recall_score = round(entity_recall_score, 4) metrics.append({ "rouge_score": rouge_score, "semantic_score": semantic_score, - "context_entity_recall_score": entity_recall_score }) return metrics except Exception as e: logging.exception("Error in get_additional_metrics") - return {"error": str(e)} \ No newline at end of file + return {"error": str(e)} + + +def custom_is_finished_parser(response): + is_finished_list = [] + for g in response.flatten(): + resp = g.generations[0][0] + if resp.generation_info is not None: + if resp.generation_info.get("finish_reason") is not None: + is_finished_list.append( + resp.generation_info.get("finish_reason") == "STOP" + ) + + elif ( + isinstance(resp, ChatGeneration) + and t.cast(ChatGeneration, resp).message is not None + ): + resp_message: BaseMessage = t.cast(ChatGeneration, resp).message + if resp_message.response_metadata.get("finish_reason") is not None: + is_finished_list.append( + resp_message.response_metadata.get("finish_reason") == "STOP" + ) + else: + is_finished_list.append(True) + return all(is_finished_list) \ No newline at end of file diff --git a/frontend/src/components/ChatBot/ChatModesSwitch.tsx b/frontend/src/components/ChatBot/ChatModesSwitch.tsx index 5ff01a919..1327d5a7f 100644 --- a/frontend/src/components/ChatBot/ChatModesSwitch.tsx +++ b/frontend/src/components/ChatBot/ChatModesSwitch.tsx @@ -29,7 +29,7 @@ export default function ChatModesSwitch({ onClick={() => switchToOtherMode(currentModeIndex - 1)} ariaLabel='left' > - +
    switchToOtherMode(currentModeIndex + 1)} ariaLabel='right' > - + ); diff --git a/frontend/src/components/ChatBot/CommonChatActions.tsx b/frontend/src/components/ChatBot/CommonChatActions.tsx index d1a5c6243..9f7acc8b9 100644 --- a/frontend/src/components/ChatBot/CommonChatActions.tsx +++ b/frontend/src/components/ChatBot/CommonChatActions.tsx @@ -43,7 +43,7 @@ export default function CommonActions({ disabled={chat.isTyping || chat.isLoading} aria-label='copy text' > - + - {chat.speaking ? : } + {chat.speaking ? ( + + ) : ( + + )} ); diff --git a/frontend/src/components/ChatBot/MetricsTab.tsx b/frontend/src/components/ChatBot/MetricsTab.tsx index b39292d94..627c60ec3 100644 --- a/frontend/src/components/ChatBot/MetricsTab.tsx +++ b/frontend/src/components/ChatBot/MetricsTab.tsx @@ -119,23 +119,7 @@ function MetricsTab({ }} /> ), - PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { - return ( - - ); - }, + Navigation: null, }} isKeyboardNavigable={false} /> diff --git a/frontend/src/components/ChatBot/MultiModeMetrics.tsx b/frontend/src/components/ChatBot/MultiModeMetrics.tsx index 8bd89f1d8..343664557 100644 --- a/frontend/src/components/ChatBot/MultiModeMetrics.tsx +++ b/frontend/src/components/ChatBot/MultiModeMetrics.tsx @@ -102,7 +102,7 @@ export default function MultiModeMetrics({ ), }), - columnHelper.accessor((row) => row.context_entity_recall_score as number, { + columnHelper.accessor((row) => row.context_entity_recall as number, { id: 'Entity Recall Score', cell: (info) => { const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); @@ -201,7 +201,7 @@ export default function MultiModeMetrics({ }); useEffect(() => { if (isWithAdditionalMetrics === false) { - table.setColumnVisibility({ 'Recall Score': false, 'Semantic Score': false, 'Rouge Score': false }); + table.setColumnVisibility({ 'Semantic Score': false, 'Rouge Score': false }); } else { table.resetColumnVisibility(true); } @@ -235,23 +235,7 @@ export default function MultiModeMetrics({ }} /> ), - PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { - return ( - - ); - }, + Navigation: null, }} isKeyboardNavigable={false} /> From bad63890a61ffde89883dbfea928b05f2cc056d7 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Thu, 5 Dec 2024 08:50:02 +0000 Subject: [PATCH 284/292] elementid if no id is there --- .../DeleteTabForOrphanNodes/index.tsx | 6 +++--- frontend/src/utils/Constants.ts | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx index 5d23cc7c4..de8cdc186 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx @@ -122,13 +122,13 @@ export default function DeletePopUpForOrphanNodes({ return (
    handleOrphanNodeClick(info.row.id, 'chatInfoView'), - title: info.getValue(), + title: info.getValue() ? info.getValue() : info.row.id, }} > - {info.getValue()} + {info.getValue() ? info.getValue() : info.row.id}
    ); diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index d83000eca..2af5f9942 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -366,5 +366,5 @@ export const metricsinfo: Record = { answer_relevancy: "Determines How well the answer addresses the user's question.", rouge_score: 'Determines How much the generated answer matches the reference answer, word-for-word.', semantic_score: 'Determines How well the generated answer understands the meaning of the reference answer.', - context_entity_recall_score: 'Determines the recall of entities present in both generated answer and retrieved contexts', + context_entity_recall: 'Determines the recall of entities present in both generated answer and retrieved contexts', }; From 992fa9f2396eea400b263670fc9cc7f2906a94f4 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Thu, 5 Dec 2024 10:06:32 +0000 Subject: [PATCH 285/292] backenapi for all env --- backend/score.py | 2 +- frontend/src/components/Layout/PageLayout.tsx | 90 ++++++++----------- 2 files changed, 40 insertions(+), 52 deletions(-) diff --git a/backend/score.py b/backend/score.py index cacbcc791..737a82afe 100644 --- a/backend/score.py +++ b/backend/score.py @@ -993,7 +993,7 @@ async def backend_connection_configuation(): message="Unable to connect backend DB" error_message = str(e) logging.exception(f'{error_message}') - return create_api_response(job_status, message=message, error=error_message + ' or fill from the login dialog', data=graph_connection) + return create_api_response(job_status, message=message, error=error_message.rstrip('.') + ', or fill from the login dialog.', data=graph_connection) finally: gc.collect() diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 12cbc9cbe..ce37c50fc 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -63,12 +63,10 @@ const PageLayout: React.FC = () => { showDisconnectButton, } = useCredentials(); const { cancel } = useSpeechSynthesis(); - + useEffect(() => { async function initializeConnection() { const session = localStorage.getItem('neo4j.connection'); - const environment = process.env.VITE_ENV; - const isDev = environment === 'DEV'; // Fetch backend health status try { const response = await healthStatus(); @@ -137,63 +135,53 @@ const PageLayout: React.FC = () => { return false; } }; - // Handle case where session exists + // Handle connection initialization let backendApiResponse; try { - if (isDev) { - backendApiResponse = await envConnectionAPI(); - const connectionData = backendApiResponse.data; - const envCredentials = { - uri: connectionData.data.uri, - password: atob(connectionData.data.password), - userName: connectionData.data.user_name, - database: connectionData.data.database, - isReadonlyUser: !connectionData.data.write_access, - isGds: connectionData.data.gds_status, - }; - if (session && isDev) { - const updated = updateSessionIfNeeded(envCredentials, session); - if (!updated) { - setUserCredentialsFromSession(session); // Using stored session if no update is needed - } - setConnectionStatus(Boolean(connectionData.data.graph_connection)); - setIsBackendConnected(true); - handleDisconnectButtonState(false); - } else if (!session) { - setUserCredentials(envCredentials); - localStorage.setItem( - 'neo4j.connection', - JSON.stringify({ - uri: envCredentials.uri, - user: envCredentials.userName, - password: btoa(envCredentials.password), - database: envCredentials.database, - userDbVectorIndex: 384, - isReadOnlyUser: envCredentials.isReadonlyUser, - isGDS: envCredentials.isGds, - }) - ); - setConnectionStatus(true); - setGdsActive(envCredentials.isGds); - setIsReadOnlyUser(envCredentials.isReadonlyUser); - handleDisconnectButtonState(false); + backendApiResponse = await envConnectionAPI(); + const connectionData = backendApiResponse.data; + const envCredentials = { + uri: connectionData.data.uri, + password: atob(connectionData.data.password), + userName: connectionData.data.user_name, + database: connectionData.data.database, + isReadonlyUser: !connectionData.data.write_access, + isGds: connectionData.data.gds_status, + }; + if (session) { + const updated = updateSessionIfNeeded(envCredentials, session); + if (!updated) { + setUserCredentialsFromSession(session); // Use stored session if no update is needed } - } else if (session && !isDev) { - // For PROD, picking the session values - setUserCredentialsFromSession(session as string); - setConnectionStatus(true); - handleDisconnectButtonState(true); + setConnectionStatus(Boolean(connectionData.data.graph_connection)); + setIsBackendConnected(true); + handleDisconnectButtonState(false); } else { - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - handleDisconnectButtonState(true); + setUserCredentials(envCredentials); + localStorage.setItem( + 'neo4j.connection', + JSON.stringify({ + uri: envCredentials.uri, + user: envCredentials.userName, + password: btoa(envCredentials.password), + database: envCredentials.database, + userDbVectorIndex: 384, + isReadOnlyUser: envCredentials.isReadonlyUser, + isGDS: envCredentials.isGds, + }) + ); + setConnectionStatus(true); + setGdsActive(envCredentials.isGds); + setIsReadOnlyUser(envCredentials.isReadonlyUser); + handleDisconnectButtonState(false); } } catch (error) { - console.error('Error in DEV session handling:', error); + console.error('Error during backend API call:', error); if (session) { - setUserCredentialsFromSession(session as string); + setUserCredentialsFromSession(session); setConnectionStatus(true); } else { - setErrorMessage(backendApiResponse?.data.error); + setErrorMessage(backendApiResponse?.data?.error); setOpenConnection((prev) => ({ ...prev, openPopUp: true })); } handleDisconnectButtonState(true); From 7b3b1b4d29b5893b2e9af0f6efacfd2641ed928e Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Thu, 5 Dec 2024 18:08:30 +0530 Subject: [PATCH 286/292] Bug fixing for Icon (#924) * gds name handling * Update Content.tsx * show error message * format --- frontend/src/components/ChatBot/Chatbot.tsx | 12 ++++++------ frontend/src/components/ChatBot/chatInfo.ts | 3 +++ frontend/src/components/Layout/PageLayout.tsx | 4 ++-- .../GraphEnhancementDialog/Deduplication/index.tsx | 4 ++-- .../PostProcessingCheckList/SelectedJobList.tsx | 4 ++-- frontend/src/utils/Utils.ts | 2 +- 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index aa84764bc..877dcdab4 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -249,7 +249,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg ) ); } @@ -264,7 +264,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) + msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg ) ); } @@ -273,7 +273,7 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId + msg.id === chatbotMessageId ? { ...msg, modes: { @@ -281,7 +281,7 @@ const Chatbot: FC = (props) => { [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, }, } - : msg) + : msg ) ); } @@ -294,7 +294,7 @@ const Chatbot: FC = (props) => { if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - (msg.id === chatbotMessageId + msg.id === chatbotMessageId ? { ...msg, isLoading: false, @@ -306,7 +306,7 @@ const Chatbot: FC = (props) => { }, }, } - : msg) + : msg ) ); } diff --git a/frontend/src/components/ChatBot/chatInfo.ts b/frontend/src/components/ChatBot/chatInfo.ts index c7e990ae7..26f2e765c 100644 --- a/frontend/src/components/ChatBot/chatInfo.ts +++ b/frontend/src/components/ChatBot/chatInfo.ts @@ -1,5 +1,6 @@ import { getNeighbors } from '../../services/GraphQuery'; import { NeoNode, NeoRelationship, UserCredentials } from '../../types'; +import { showNormalToast } from '../../utils/toasts'; export const handleGraphNodeClick = async ( userCredentials: UserCredentials, @@ -29,6 +30,8 @@ export const handleGraphNodeClick = async ( setNeoRels(relationships); setOpenGraphView(true); setViewPoint('chatInfoView'); + } else { + showNormalToast('No nodes or relationships found for the selected node.'); } } catch (error: any) { console.error('Error fetching neighbors:', error); diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index ce37c50fc..005d438b5 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -95,7 +95,7 @@ const PageLayout: React.FC = () => { password: atob(parsedConnection.password), database: parsedConnection.database, }); - setGdsActive(parsedConnection.isGDS); + setGdsActive(parsedConnection.isgdsActive); setIsReadOnlyUser(parsedConnection.isReadOnlyUser); } else { console.error('Invalid parsed session data:', parsedConnection); @@ -124,7 +124,7 @@ const PageLayout: React.FC = () => { database: envCredentials.database, userDbVectorIndex: 384, isReadOnlyUser: envCredentials.isReadonlyUser, - isGDS: envCredentials.isGds, + isgdsActive: envCredentials.isgdsActive, }) ); return true; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index d59b46e21..df30c25c1 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -104,12 +104,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - (d.e.elementId === nodeid + d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d) + : d ); }); }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx index 447b1f846..fc5f39206 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx @@ -11,11 +11,11 @@ export default function SelectedJobList({ }) { const ongoingPostProcessingTasks = useMemo( () => - (isGdsActive + isGdsActive ? postProcessingTasks.includes('enable_communities') ? postProcessingTasks : postProcessingTasks.filter((s) => s != 'enable_communities') - : postProcessingTasks.filter((s) => s != 'enable_communities')), + : postProcessingTasks.filter((s) => s != 'enable_communities'), [isGdsActive, postProcessingTasks] ); return ( diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 7314c55e7..82217535d 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -395,7 +395,7 @@ export const isFileCompleted = (waitingFile: CustomFile, item: SourceNode) => waitingFile && item.status === 'Completed'; export const calculateProcessedCount = (prev: number, batchSize: number) => - (prev === batchSize ? batchSize - 1 : prev + 1); + prev === batchSize ? batchSize - 1 : prev + 1; export const isProcessingFileValid = (item: SourceNode, userCredentials: UserCredentials) => { return item.status === 'Processing' && item.fileName != undefined && userCredentials && userCredentials.database; From fc85b45a3654d8e29f5169d84eacc6b5dcb1e23e Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:09:09 +0000 Subject: [PATCH 287/292] diffbot placement --- frontend/src/components/Layout/PageLayout.tsx | 6 +++--- frontend/src/utils/Constants.ts | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 005d438b5..cb29ceb01 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -146,7 +146,7 @@ const PageLayout: React.FC = () => { userName: connectionData.data.user_name, database: connectionData.data.database, isReadonlyUser: !connectionData.data.write_access, - isGds: connectionData.data.gds_status, + isgdsActive: connectionData.data.gds_status, }; if (session) { const updated = updateSessionIfNeeded(envCredentials, session); @@ -167,11 +167,11 @@ const PageLayout: React.FC = () => { database: envCredentials.database, userDbVectorIndex: 384, isReadOnlyUser: envCredentials.isReadonlyUser, - isGDS: envCredentials.isGds, + isgdsActive: envCredentials.isgdsActive, }) ); setConnectionStatus(true); - setGdsActive(envCredentials.isGds); + setGdsActive(envCredentials.isgdsActive); setIsReadOnlyUser(envCredentials.isReadonlyUser); handleDisconnectButtonState(false); } diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 2af5f9942..d577194eb 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -13,12 +13,12 @@ export const llms = process.env?.VITE_LLM_MODELS?.trim() != '' ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) : [ - 'diffbot', 'openai_gpt_3.5', 'openai_gpt_4o', 'openai_gpt_4o_mini', 'gemini_1.5_pro', 'gemini_1.5_flash', + 'diffbot', 'azure_ai_gpt_35', 'azure_ai_gpt_4o', 'ollama_llama3', From fac07e39dcf7baa34cda0a122e64bbafb4fedf85 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Thu, 5 Dec 2024 19:00:32 +0530 Subject: [PATCH 288/292] Update MultiModeMetrics.tsx --- frontend/src/components/ChatBot/MultiModeMetrics.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/ChatBot/MultiModeMetrics.tsx b/frontend/src/components/ChatBot/MultiModeMetrics.tsx index 343664557..5be629c1c 100644 --- a/frontend/src/components/ChatBot/MultiModeMetrics.tsx +++ b/frontend/src/components/ChatBot/MultiModeMetrics.tsx @@ -122,7 +122,7 @@ export default function MultiModeMetrics({ - Determines the recall of entities present in both reference and retrieved contexts. + Determines the recall of entities present in both generated answer and retrieved contexts. From 0fa4d84562e5909b6daa1013b3f31f73725cae75 Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Thu, 5 Dec 2024 14:03:59 +0000 Subject: [PATCH 289/292] libmagic1 library added --- backend/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/Dockerfile b/backend/Dockerfile index 5249ac53c..c36f8ce2e 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -5,6 +5,7 @@ EXPOSE 8000 # Install dependencies and clean up in one layer RUN apt-get update && \ apt-get install -y --no-install-recommends \ + libmagic1 \ libgl1-mesa-glx \ libreoffice \ cmake \ From a86e07f0edc379b0ae3c85f9950e569e15e83aa7 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:51:58 +0530 Subject: [PATCH 290/292] Document read me update (#926) * removed openai, diffbot, groq key * Removed openai key, diffbot key from backend readme * Added openai key back * Update README.md * Update README.md Backend env change --------- Co-authored-by: kaustubh-darekar --- README.md | 20 +++++++++----------- backend/README.md | 6 +++--- backend/example.env | 3 +-- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index e37d3ddfe..6f222b44b 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Upload your files from local machine, GCS or S3 bucket or from web sources, choo - **Knowledge Graph Creation**: Transform unstructured data into structured knowledge graphs using LLMs. - **Providing Schema**: Provide your own custom schema or use existing schema in settings to generate graph. - **View Graph**: View graph for a particular source or multiple sources at a time in Bloom. -- **Chat with Data**: Interact with your data in a Neo4j database through conversational queries, also retrieve metadata about the source of response to your queries. +- **Chat with Data**: Interact with your data in a Neo4j database through conversational queries, also retrieve metadata about the source of response to your queries.For a dedicated chat interface, access the standalone chat application at: [Chat-Only](https://dev-frontend-dcavk67s4a-uc.a.run.app/chat-only). This link provides a focused chat experience for querying your data. ## Getting started @@ -37,12 +37,6 @@ EX: VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" ``` -In your root folder, create a .env file with your OPENAI and DIFFBOT keys (if you want to use both): -```env -OPENAI_API_KEY="your-openai-key" -DIFFBOT_API_KEY="your-diffbot-key" -``` - if you only want OpenAI: ```env VITE_LLM_MODELS_PROD="diffbot,openai-gpt-3.5,openai-gpt-4o" @@ -102,9 +96,13 @@ Alternatively, you can run the backend and frontend separately: ``` - For the backend: -1. Create the backend/.env file by copy/pasting the backend/example.env. -2. Change values as needed -3. +1. Create the backend/.env file by copy/pasting the backend/example.env. To streamline the initial setup and testing of the application, you can preconfigure user credentials directly within the .env file. This bypasses the login dialog and allows you to immediately connect with a predefined user. + - **NEO4J_URI**: + - **NEO4J_USERNAME**: + - **NEO4J_PASSWORD**: + - **NEO4J_DATABASE**: +3. Change values as needed +4. ```bash cd backend python -m venv envName @@ -202,7 +200,7 @@ VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-backendurl} ## Usage -1. Connect to Neo4j Aura Instance which can be both AURA DS or AURA DB by passing URI and password or using Neo4j credentials file. +1. Connect to Neo4j Aura Instance which can be both AURA DS or AURA DB by passing URI and password through Backend env, fill using login dialog or drag and drop the Neo4j credentials file. 2. To differntiate we have added different icons. For AURA DB we have a database icon and for AURA DS we have scientific molecule icon right under Neo4j Connection details label. 3. Choose your source from a list of Unstructured sources to create graph. 4. Change the LLM (if required) from drop down, which will be used to generate graph. diff --git a/backend/README.md b/backend/README.md index 5783327d7..1ab091216 100644 --- a/backend/README.md +++ b/backend/README.md @@ -50,11 +50,11 @@ http://127.0.0.1:8000/redocs for ReDoc. ## Configuration -Update the environment variable in `.env` file. +Update the environment variable in `.env` file. Refer example.env in backend folder for more config. -`OPENAI_API_KEY`: Open AI key to use LLM +`OPENAI_API_KEY`: Open AI key to use incase of openai embeddings -`DIFFBOT_API_KEY` : Diffbot API key to use DiffbotGraphTransformer +`EMBEDDING_MODEL` : "all-MiniLM-L6-v2" or "openai" or "vertexai" `NEO4J_URI` : Neo4j URL diff --git a/backend/example.env b/backend/example.env index 7cf9b13ac..85b8d1054 100644 --- a/backend/example.env +++ b/backend/example.env @@ -1,6 +1,5 @@ OPENAI_API_KEY = "" -DIFFBOT_API_KEY = "" -GROQ_API_KEY = "" +#EMBEDDING_MODEL can be openai or vertexai or by default all-MiniLM-L6-v2 EMBEDDING_MODEL = "all-MiniLM-L6-v2" RAGAS_EMBEDDING_MODEL = "openai" IS_EMBEDDING = "true" From 98cfc3c1353feec43459b4d6749bf41973d99d82 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Mon, 9 Dec 2024 12:50:12 +0000 Subject: [PATCH 291/292] metric table and default model fixes --- .../src/components/ChatBot/ChatInfoModal.tsx | 8 +- frontend/src/components/ChatBot/Chatbot.tsx | 14 +- .../components/ChatBot/MultiModeMetrics.tsx | 153 +++++++++++++++--- frontend/src/components/FileTable.tsx | 1 + .../src/components/Layout/DrawerDropzone.tsx | 10 +- frontend/src/components/Layout/PageLayout.tsx | 2 +- .../Deduplication/index.tsx | 4 +- .../SelectedJobList.tsx | 4 +- .../src/components/UI/ButtonWithToolTip.tsx | 2 +- .../src/components/UI/IconButtonToolTip.tsx | 2 +- frontend/src/context/UsersFiles.tsx | 6 +- frontend/src/utils/Constants.ts | 2 +- frontend/src/utils/Utils.ts | 2 +- 13 files changed, 164 insertions(+), 46 deletions(-) diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 68c07227d..ab66a52f1 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -96,16 +96,16 @@ const ChatInfoModal: React.FC = ({ const [enableReference, toggleReferenceVisibility] = useReducer((state: boolean) => !state, false); const textAreaRef = useRef(null); const [isAdditionalMetricsEnabled, setIsAdditionalMetricsEnabled] = useState( - multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length > 3 + multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length > 4 ? true - : multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length <= 3 + : multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length <= 4 ? false : null ); const [isAdditionalMetricsWithSingleMode, setIsAdditionalMetricsWithSingleMode] = useState( - metricDetails != undefined && Object.keys(metricDetails).length > 2 + metricDetails != undefined && Object.keys(metricDetails).length > 3 ? true - : metricDetails != undefined && Object.keys(metricDetails).length <= 2 + : metricDetails != undefined && Object.keys(metricDetails).length <= 3 ? false : null ); diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 877dcdab4..a3fe174c2 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -249,7 +249,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) ) ); } @@ -264,7 +264,7 @@ const Chatbot: FC = (props) => { } else { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg + (msg.id === chatbotMessageId ? { ...msg, modes: { ...msg.modes, [mode]: responseMode } } : msg) ) ); } @@ -273,7 +273,7 @@ const Chatbot: FC = (props) => { console.error(`API call failed for mode ${mode}:`, result.reason); setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId + (msg.id === chatbotMessageId ? { ...msg, modes: { @@ -281,7 +281,7 @@ const Chatbot: FC = (props) => { [mode]: { message: 'Failed to fetch response for this mode.', error: result.reason }, }, } - : msg + : msg) ) ); } @@ -294,7 +294,7 @@ const Chatbot: FC = (props) => { if (error instanceof Error) { setListMessages((prev) => prev.map((msg) => - msg.id === chatbotMessageId + (msg.id === chatbotMessageId ? { ...msg, isLoading: false, @@ -306,7 +306,7 @@ const Chatbot: FC = (props) => { }, }, } - : msg + : msg) ) ); } @@ -578,7 +578,7 @@ const Chatbot: FC = (props) => { }} onClose={() => setShowInfoModal(false)} isOpen={showInfoModal} - size={activeChat?.currentMode === chatModeLables['entity search+vector'] ? 'large' : 'medium'} + size={'large'} >
    Mode, footer: (info) => info.column.id, + maxSize: 150, }), columnHelper.accessor((row) => row.answer_relevancy as number, { id: 'Answer Relevancy', @@ -74,6 +75,7 @@ export default function MultiModeMetrics({ ), + maxSize: 150, }), columnHelper.accessor((row) => row.faithfulness as number, { id: 'Faithfullness', @@ -101,6 +103,7 @@ export default function MultiModeMetrics({ ), + maxSize: 150, }), columnHelper.accessor((row) => row.context_entity_recall as number, { id: 'Entity Recall Score', @@ -128,6 +131,7 @@ export default function MultiModeMetrics({ ), + maxSize: 150, }), columnHelper.accessor((row) => row.semantic_score as number, { id: 'Semantic Score', @@ -155,6 +159,7 @@ export default function MultiModeMetrics({ ), + maxSize: 150, }), columnHelper.accessor((row) => row.rouge_score as number, { id: 'Rouge Score', @@ -182,30 +187,134 @@ export default function MultiModeMetrics({ ), + maxSize: 150, }), ], [] ); - const table = useReactTable({ - data, - columns, - getCoreRowModel: getCoreRowModel(), - getFilteredRowModel: getFilteredRowModel(), - getPaginationRowModel: getPaginationRowModel(), - enableGlobalFilter: false, - autoResetPageIndex: false, - enableColumnResizing: true, - enableRowSelection: true, - enableMultiRowSelection: true, - enableSorting: false, - }); - useEffect(() => { - if (isWithAdditionalMetrics === false) { - table.setColumnVisibility({ 'Semantic Score': false, 'Rouge Score': false }); - } else { - table.resetColumnVisibility(true); - } - }, [isWithAdditionalMetrics, table]); + const columnswithoutSemanticAndRougeScores = useMemo( + () => [ + columnHelper.accessor((row) => row.mode, { + id: 'Mode', + cell: (info) => { + const metric = info.getValue(); + const capitilizedMetric = metric.includes('_') + ? metric + .split('_') + .map((w) => capitalize(w)) + .join(' ') + : capitalize(metric); + return ( +
    + {capitilizedMetric} +
    + ); + }, + header: () => Mode, + footer: (info) => info.column.id, + maxSize: 150, + }), + columnHelper.accessor((row) => row.answer_relevancy as number, { + id: 'Answer Relevancy', + cell: (info) => { + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; + }, + header: () => ( + + Relevancy + + + + + + + + + Determines How well the answer addresses the user's question. + + + + + ), + }), + columnHelper.accessor((row) => row.faithfulness as number, { + id: 'Faithfullness', + cell: (info) => { + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; + }, + header: () => ( + + Faithful + + + + + + + + + Determines How accurately the answer reflects the provided information. + + + + + ), + }), + columnHelper.accessor((row) => row.context_entity_recall as number, { + id: 'Entity Recall Score', + cell: (info) => { + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; + }, + header: () => ( + + Context + + + + + + + + + Determines the recall of entities present in both generated answer and retrieved contexts. + + + + + ), + }), + ], + [] + ); + const config = useMemo( + () => ({ + data, + columns: !isWithAdditionalMetrics ? columnswithoutSemanticAndRougeScores : columns, + getCoreRowModel: getCoreRowModel(), + getFilteredRowModel: getFilteredRowModel(), + getPaginationRowModel: getPaginationRowModel(), + enableGlobalFilter: false, + autoResetPageIndex: false, + enableColumnResizing: true, + enableRowSelection: true, + enableMultiRowSelection: true, + enableSorting: false, + }), + [isWithAdditionalMetrics] + ); + const table = useReactTable(config); return ( @@ -226,7 +335,7 @@ export default function MultiModeMetrics({ }} isAutoResizingColumns={true} isLoading={metricsLoading} - rootProps={{ className: isWithAdditionalMetrics === false ? '!w-[465px]' : 'auto' }} + // rootProps={{ className: isWithAdditionalMetrics === false ? '!w-[465px]' : 'auto' }} components={{ Body: () => ( ((props, ref) => { return (
    = ({ return (
    - + {!isReadOnlyUser ? ( - + {alertState.showAlert && ( { showDisconnectButton, } = useCredentials(); const { cancel } = useSpeechSynthesis(); - + useEffect(() => { async function initializeConnection() { const session = localStorage.getItem('neo4j.connection'); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index df30c25c1..d59b46e21 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -104,12 +104,12 @@ export default function DeduplicationTab() { const onRemove = (nodeid: string, similarNodeId: string) => { setDuplicateNodes((prev) => { return prev.map((d) => - d.e.elementId === nodeid + (d.e.elementId === nodeid ? { ...d, similar: d.similar.filter((n) => n.elementId != similarNodeId), } - : d + : d) ); }); }; diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx index fc5f39206..447b1f846 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx @@ -11,11 +11,11 @@ export default function SelectedJobList({ }) { const ongoingPostProcessingTasks = useMemo( () => - isGdsActive + (isGdsActive ? postProcessingTasks.includes('enable_communities') ? postProcessingTasks : postProcessingTasks.filter((s) => s != 'enable_communities') - : postProcessingTasks.filter((s) => s != 'enable_communities'), + : postProcessingTasks.filter((s) => s != 'enable_communities')), [isGdsActive, postProcessingTasks] ); return ( diff --git a/frontend/src/components/UI/ButtonWithToolTip.tsx b/frontend/src/components/UI/ButtonWithToolTip.tsx index 310566edf..acede505b 100644 --- a/frontend/src/components/UI/ButtonWithToolTip.tsx +++ b/frontend/src/components/UI/ButtonWithToolTip.tsx @@ -33,7 +33,7 @@ const ButtonWithToolTip = ({ const [isHovered, setIsHovered] = useState(false); return ( - +