Skip to content

Commit

Permalink
done use chroma.upsert, it duplicates embeddings
Browse files Browse the repository at this point in the history
  • Loading branch information
granawkins committed May 17, 2024
1 parent 0d475d7 commit 0e42260
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 8 deletions.
2 changes: 1 addition & 1 deletion ragdaemon/annotators/chunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,6 @@ async def annotate(
add_to_db["metadatas"].append(data)
if len(add_to_db["ids"]) > 0:
add_to_db = remove_add_to_db_duplicates(**add_to_db)
db.upsert(**add_to_db)
db.add(**add_to_db)

return graph
8 changes: 4 additions & 4 deletions ragdaemon/annotators/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,11 @@ async def annotate(

# Sync with remote DB
ids = list(set(checksums.values()))
response = db.get(ids=ids, include=["metadatas"])
db_data = {id: data for id, data in zip(response["ids"], response["metadatas"])}
response = db.get(ids=ids, include=[])
db_data = set(response["ids"])
add_to_db = {"ids": [], "documents": [], "metadatas": []}
for id, checksum in checksums.items():
if not refresh and checksum in db_data:
if checksum in db_data:
continue
data = deepcopy(graph.nodes[id])
document = data.pop("document")
Expand All @@ -168,6 +168,6 @@ async def annotate(
add_to_db["metadatas"].append(data)
if len(add_to_db["ids"]) > 0:
add_to_db = remove_add_to_db_duplicates(**add_to_db)
db.upsert(**add_to_db)
db.add(**add_to_db)

return graph
4 changes: 2 additions & 2 deletions ragdaemon/annotators/hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ async def annotate(
db_data = {id: data for id, data in zip(response["ids"], response["metadatas"])}
add_to_db = {"ids": [], "documents": [], "metadatas": []}
for path, checksum in checksums.items():
if not refresh and checksum in db_data:
if checksum in db_data:
data = db_data[checksum]
graph.nodes[path.as_posix()].update(data)
else:
Expand All @@ -118,7 +118,7 @@ async def annotate(
add_to_db["metadatas"].append(data)
if len(add_to_db["ids"]) > 0:
add_to_db = remove_add_to_db_duplicates(**add_to_db)
db.upsert(**add_to_db)
db.add(**add_to_db)

graph.graph["files_checksum"] = files_checksum(cwd, self.ignore_patterns)
return graph
2 changes: 1 addition & 1 deletion ragdaemon/database/lite_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def query(self, query: str, active_checksums: list[str]) -> dict[str, list[Any]]
"distances": [[1] * len(records)],
}

def upsert(
def add(
self,
ids: list[str] | str,
metadatas: list[dict] | dict,
Expand Down

0 comments on commit 0e42260

Please sign in to comment.