diff --git a/pyproject.toml b/pyproject.toml
index 132febd..295ae2d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ packages=["ragdaemon"]
 
 [project]
 name = "ragdaemon"
-version = "0.4.7"
+version = "0.5.0"
 description = "Generate and render a call graph for a Python project."
 readme = "README.md"
 dependencies = [
diff --git a/ragdaemon/__init__.py b/ragdaemon/__init__.py
index a34b2f6..3d18726 100644
--- a/ragdaemon/__init__.py
+++ b/ragdaemon/__init__.py
@@ -1 +1 @@
-__version__ = "0.4.7"
+__version__ = "0.5.0"
diff --git a/ragdaemon/annotators/call_graph.py b/ragdaemon/annotators/call_graph.py
index c2e4497..5de1b86 100644
--- a/ragdaemon/annotators/call_graph.py
+++ b/ragdaemon/annotators/call_graph.py
@@ -9,7 +9,7 @@
 from spice.models import TextModel
 
 from ragdaemon.annotators.base_annotator import Annotator
-from ragdaemon.database import Database
+from ragdaemon.database import Database, remove_update_db_duplicates
 from ragdaemon.graph import KnowledgeGraph
 from ragdaemon.errors import RagdaemonError
 from ragdaemon.utils import (
@@ -155,13 +155,11 @@ async def get_file_call_data(
         node: str,
         data: dict,
         graph: KnowledgeGraph,
-        db: Database,
         retries: int = 1,
     ):
-        """Generate and save call data for a file node to graph and db"""
+        """Generate and save call data for a file node to graph"""
         calls = {}
-        record = db.get(data["checksum"])
-        document = record["documents"][0]
+        document = data["document"]
 
         # Insert line numbers
         lines = document.split("\n")
@@ -184,10 +182,6 @@ async def get_file_call_data(
                             else "Skipping."
                         )
 
-        # Save to db and graph
-        metadatas = record["metadatas"][0]
-        metadatas[self.call_field_id] = json.dumps(calls)
-        db.update(data["checksum"], metadatas=metadatas)
         data[self.call_field_id] = calls
 
     async def annotate(
@@ -212,17 +206,27 @@ async def annotate(
                     files_with_calls.append((node, data))
         # Generate/add call data for nodes that don't have it
         tasks = []
+        files_just_updated = set()
         for node, data in files_with_calls:
             if refresh or data.get(self.call_field_id, None) is None:
                 checksum = data.get("checksum")
                 if checksum is None:
                     raise RagdaemonError(f"Node {node} has no checksum.")
-                tasks.append(self.get_file_call_data(node, data, graph, db))
+                tasks.append(self.get_file_call_data(node, data, graph))
+                files_just_updated.add(node)
         if len(tasks) > 0:
             if self.verbose:
                 await tqdm.gather(*tasks, desc="Generating call graph")
             else:
                 await asyncio.gather(*tasks)
+            update_db = {"ids": [], "metadatas": []}
+            for node in files_just_updated:
+                data = graph.nodes[node]
+                update_db["ids"].append(data["checksum"])
+                metadatas = {self.call_field_id: json.dumps(data[self.call_field_id])}
+                update_db["metadatas"].append(metadatas)
+            update_db = remove_update_db_duplicates(**update_db)
+            db.update(**update_db)
 
         # Add call edges to graph. Each call should have only ONE source; if there are
         # chunks, the source is the matching chunk, otherwise it's the file.
@@ -244,8 +248,7 @@ async def annotate(
                 checksum = data.get("checksum")
                 if checksum is None:
                     raise RagdaemonError(f"File node {file} is missing checksum field.")
-                record = db.get(checksum)
-                document = record["documents"][0]
+                document = data["document"]
                 for i in range(1, len(document.split("\n")) + 1):
                     line_index[i] = file
             else:
diff --git a/ragdaemon/annotators/chunker.py b/ragdaemon/annotators/chunker.py
index 0e8423b..0d4e4c0 100644
--- a/ragdaemon/annotators/chunker.py
+++ b/ragdaemon/annotators/chunker.py
@@ -17,13 +17,18 @@
 
 import asyncio
 import json
+from copy import deepcopy
 from pathlib import Path
-from typing import Any, Coroutine, Optional
+from typing import Any, Optional
 
 from tqdm.asyncio import tqdm
 
 from ragdaemon.annotators.base_annotator import Annotator
-from ragdaemon.database import Database, remove_add_to_db_duplicates
+from ragdaemon.database import (
+    Database,
+    remove_add_to_db_duplicates,
+    remove_update_db_duplicates,
+)
 from ragdaemon.errors import RagdaemonError
 from ragdaemon.graph import KnowledgeGraph
 from ragdaemon.utils import DEFAULT_CODE_EXTENSIONS, get_document, hash_str, truncate
@@ -64,92 +69,18 @@ async def chunk_document(self, document: str) -> list[dict[str, Any]]:
         """Return a list of {id, ref} chunks for the given document."""
         raise NotImplementedError()
 
-    async def get_file_chunk_data(self, node, data, db):
+    async def get_file_chunk_data(self, node, data):
         """Generate and save chunk data for a file node to graph and db"""
-        record = db.get(data["checksum"])
-        document = record["documents"][0]
+        document = data["document"]
         try:
             chunks = await self.chunk_document(document)
         except RagdaemonError:
             if self.verbose:
                 print(f"Error chunking {node}; skipping.")
             chunks = []
-        # Save to db and graph
-        metadatas = record["metadatas"][0]
-        metadatas[self.chunk_field_id] = json.dumps(chunks)
-        db.update(data["checksum"], metadatas=metadatas)
+        chunks = sorted(chunks, key=lambda x: len(x["id"]))
         data[self.chunk_field_id] = chunks
 
-    def add_file_chunks_to_graph(
-        self,
-        file: str,
-        data: dict,
-        graph: KnowledgeGraph,
-        db: Database,
-        refresh: bool = False,
-    ) -> dict[str, list[Any]]:
-        """Load chunks from file data into db/graph"""
-
-        # Grab and validate chunks for given file
-        chunks = data.get(self.chunk_field_id)
-        if chunks is None:
-            raise RagdaemonError(f"Node {file} missing {self.chunk_field_id}")
-        if isinstance(chunks, str):
-            chunks = json.loads(chunks)
-            data[self.chunk_field_id] = chunks
-
-        add_to_db = {"ids": [], "documents": [], "metadatas": []}
-        if len(chunks) == 0:
-            return add_to_db
-        base_id = f"{file}:BASE"
-        if not any(chunk["id"] == base_id for chunk in chunks):
-            raise RagdaemonError(f"Node {file} missing base chunk")
-        edges_to_add = {(file, base_id)}
-        for chunk in chunks:
-            # Locate or create record for chunk
-            id, ref = chunk["id"], chunk["ref"]
-            document = get_document(ref, Path(graph.graph["cwd"]))
-            checksum = hash_str(document)
-            records = db.get(checksum)["metadatas"]
-            if not refresh and len(records) > 0:
-                record = records[0]
-            else:
-                record = {
-                    "id": id,
-                    "type": "chunk",
-                    "ref": chunk["ref"],
-                    "checksum": checksum,
-                    "active": False,
-                }
-                document, truncate_ratio = truncate(document, db.embedding_model)
-                if truncate_ratio > 0 and self.verbose:
-                    print(f"Truncated {id} by {truncate_ratio:.2%}")
-                add_to_db["ids"].append(checksum)
-                add_to_db["documents"].append(document)
-                add_to_db["metadatas"].append(record)
-
-            # Add chunk to graph and connect hierarchy edges
-            graph.add_node(record["id"], **record)
-
-            def _link_to_base_chunk(_id):
-                """Recursively create links from _id to base chunk."""
-                path_str, chunk_str = _id.split(":")
-                chunk_list = chunk_str.split(".")
-                _parent = (
-                    f"{path_str}:{'.'.join(chunk_list[:-1])}"
-                    if len(chunk_list) > 1
-                    else base_id
-                )
-                edges_to_add.add((_parent, _id))
-                if _parent != base_id:
-                    _link_to_base_chunk(_parent)
-
-            if id != base_id:
-                _link_to_base_chunk(id)
-        for source, target in edges_to_add:
-            graph.add_edge(source, target, type="hierarchy")
-        return add_to_db
-
     async def annotate(
         self, graph: KnowledgeGraph, db: Database, refresh: bool = False
     ) -> KnowledgeGraph:
@@ -174,37 +105,98 @@ async def annotate(
         files_just_chunked = set()
         for node, data in files_with_chunks:
             if refresh or data.get(self.chunk_field_id, None) is None:
-                tasks.append(self.get_file_chunk_data(node, data, db))
+                tasks.append(self.get_file_chunk_data(node, data))
                 files_just_chunked.add(node)
+            elif isinstance(data[self.chunk_field_id], str):
+                data[self.chunk_field_id] = json.loads(data[self.chunk_field_id])
         if len(tasks) > 0:
             if self.verbose:
                 await tqdm.gather(*tasks, desc="Chunking files...")
             else:
                 await asyncio.gather(*tasks)
+            update_db = {"ids": [], "metadatas": []}
+            for node in files_just_chunked:
+                data = graph.nodes[node]
+                update_db["ids"].append(data["checksum"])
+                metadatas = {self.chunk_field_id: json.dumps(data[self.chunk_field_id])}
+                update_db["metadatas"].append(metadatas)
+            update_db = remove_update_db_duplicates(**update_db)
+            db.update(**update_db)
 
         # Process chunks
-        add_to_db = {"ids": [], "documents": [], "metadatas": []}
-        remove_from_db = set()
+        # 1. Add all chunks to graph
+        all_chunk_ids = set()
         for file, data in files_with_chunks:
-            try:
-                refresh = refresh or file in files_just_chunked
-                _add_to_db = self.add_file_chunks_to_graph(
-                    file, data, graph, db, refresh
-                )
-                for field, values in _add_to_db.items():
-                    add_to_db[field].extend(values)
-            except RagdaemonError as e:
-                # If there's a problem with the chunks, remove the file from the db.
-                # This, along with 'files_just_chunked', prevents invalid database
-                # records perpetuating.
-                if self.verbose:
-                    print(f"Error adding chunks for {file}:\n{e}. Removing db record.")
-                remove_from_db.add(data["checksum"])
+            if len(data[self.chunk_field_id]) == 0:
                 continue
-        if len(remove_from_db) > 0:
-            db.delete(list(remove_from_db))
-            raise RagdaemonError(f"Chunking error, try again.")
+            # Sort such that "parents" are added before "children"
+            base_id = f"{file}:BASE"
+            chunks = [c for c in data[self.chunk_field_id] if c["id"] != base_id]
+            chunks.sort(key=lambda x: len(x["id"]))
+            base_chunk = [c for c in data[self.chunk_field_id] if c["id"] == base_id]
+            if len(base_chunk) != 1:
+                raise RagdaemonError(f"Node {file} missing base chunk")
+            chunks = base_chunk + chunks
+            # Load chunks into graph
+            for chunk in chunks:
+                id, ref = chunk["id"], chunk["ref"]
+                document = get_document(ref, Path(graph.graph["cwd"]))
+                chunk_data = {
+                    "id": id,
+                    "ref": ref,
+                    "type": "chunk",
+                    "document": document,
+                    "checksum": hash_str(document),
+                    "active": False,
+                }
+                graph.add_node(id, **chunk_data)
+                all_chunk_ids.add(id)
+                # Locate the parent and add hierarchy edge
+                chunk_str = id.split(":")[1]
+                if chunk_str == "BASE":
+                    parent = file
+                elif "." not in chunk_str:
+                    parent = base_id
+                else:
+                    parts = chunk_str.split(".")
+                    while True:
+                        parent = f"{file}:{'.'.join(parts[:-1])}"
+                        if parent in graph:
+                            break
+                        parent_str = parent.split(":")[1]
+                        if "." not in parent_str:
+                            # If we can't find a parent, use the base node.
+                            if self.verbose:
+                                print(f"No parent node found for {id}")
+                            parent = base_id
+                            break
+                        # If intermediate parents are missing, skip them
+                        parts = parent_str.split(".")
+                graph.add_edge(parent, id, type="hierarchy")
+
+        # 2. Get metadata for all chunks from db
+        all_chunk_checksums = [
+            graph.nodes[chunk]["checksum"] for chunk in all_chunk_ids
+        ]
+        response = db.get(ids=all_chunk_checksums, include=["metadatas"])
+        db_data = {data["id"]: data for data in response["metadatas"]}
+        add_to_db = {"ids": [], "documents": [], "metadatas": []}
+        for chunk in all_chunk_ids:
+            if chunk in db_data:
+                # 3. Add db metadata for nodes that have it
+                graph.nodes[chunk].update(db_data[chunk])
+            else:
+                # 4. Add to db nodes that don't
+                data = deepcopy(graph.nodes[chunk])
+                document = data.pop("document")
+                document, truncate_ratio = truncate(document, db.embedding_model)
+                if truncate_ratio > 0 and self.verbose:
+                    print(f"Truncated {chunk} by {truncate_ratio:.2%}")
+                add_to_db["ids"].append(data["checksum"])
+                add_to_db["documents"].append(document)
+                add_to_db["metadatas"].append(data)
         if len(add_to_db["ids"]) > 0:
             add_to_db = remove_add_to_db_duplicates(**add_to_db)
-            db.upsert(**add_to_db)
+            db.add(**add_to_db)
+
         return graph
diff --git a/ragdaemon/annotators/chunker_llm.py b/ragdaemon/annotators/chunker_llm.py
index 2132773..77b4ee3 100644
--- a/ragdaemon/annotators/chunker_llm.py
+++ b/ragdaemon/annotators/chunker_llm.py
@@ -37,10 +37,12 @@ class ChunkerLLM(Chunker):
     def __init__(
         self,
         *args,
+        batch_size: int = 800,
         model: Optional[TextModel | str] = DEFAULT_COMPLETION_MODEL,
         **kwargs,
     ):
         super().__init__(*args, **kwargs)
+        self.batch_size = batch_size
         self.model = model
 
     async def get_llm_response(
@@ -88,7 +90,7 @@ async def get_llm_response(
         return chunks
 
     async def chunk_document(
-        self, document: str, batch_size: int = 1000, retries: int = 1
+        self, document: str, retries: int = 1
     ) -> list[dict[str, Any]]:
         """Parse file_lines into a list of {id, ref} chunks."""
         lines = document.split("\n")
@@ -100,9 +102,9 @@ async def chunk_document(
 
         # Get raw llm output: {id, start_line, end_line}
         chunks = list[dict[str, Any]]()
-        n_batches = (len(file_lines) + batch_size - 1) // batch_size
+        n_batches = (len(file_lines) + self.batch_size - 1) // self.batch_size
         for i in range(n_batches):
-            batch_lines = file_lines[i * batch_size : (i + 1) * batch_size]
+            batch_lines = file_lines[i * self.batch_size : (i + 1) * self.batch_size]
             last_chunk = chunks.pop() if chunks else None
             for j in range(retries + 1, 0, -1):
                 try:
diff --git a/ragdaemon/annotators/diff.py b/ragdaemon/annotators/diff.py
index bf4e0ce..8b6ac88 100644
--- a/ragdaemon/annotators/diff.py
+++ b/ragdaemon/annotators/diff.py
@@ -1,5 +1,6 @@
 import json
 import re
+from copy import deepcopy
 from pathlib import Path
 
 from ragdaemon.annotators.base_annotator import Annotator
@@ -94,86 +95,79 @@ async def annotate(
             if data and data.get("type") == "diff"
         }
         graph.remove_nodes_from(graph_nodes)
+
+        checksums = dict[str, str]()
         document = get_document(self.diff_args, cwd, type="diff")
         checksum = hash_str(document)
-        existing_records = db.get(checksum)
-        if refresh or len(existing_records["ids"]) == 0:
-            chunks = get_chunks_from_diff(id=self.id, diff=document)
-            data = {
-                "id": self.id,
-                "ref": self.diff_args,
-                "type": "diff",
-                "checksum": checksum,
-                "chunks": json.dumps(chunks),
-                "active": False,
-            }
-
-            # If the full diff is too long to embed, it is truncated. Anything
-            # removed will be captured in chunks.
-            document, truncate_ratio = truncate(document, db.embedding_model)
-            if truncate_ratio > 0 and self.verbose:
-                print(f"Truncated diff by {truncate_ratio:.2%}")
-            db.upsert(ids=checksum, documents=document, metadatas=data)
-        else:
-            data = existing_records["metadatas"][0]
-        data["chunks"] = json.loads(data["chunks"])
+        chunks = get_chunks_from_diff(id=self.id, diff=document)
+        data = {
+            "id": self.id,
+            "ref": self.diff_args,
+            "type": "diff",
+            "document": document,
+            "checksum": checksum,
+            "chunks": chunks,
+            "active": False,
+        }
         graph.add_node(self.id, **data)
+        checksums[self.id] = checksum
 
-        # Add chunks
-        add_to_db = {"ids": [], "documents": [], "metadatas": []}
-        edges_to_add = set()
-        for chunk_id, chunk_ref in data["chunks"].items():
+        for chunk_id, chunk_ref in chunks.items():
             document = get_document(chunk_ref, cwd, type="diff")
             chunk_checksum = hash_str(document)
-            existing_records = db.get(chunk_checksum)
-            if refresh or len(existing_records["ids"]) == 0:
-                data = {
-                    "id": chunk_id,
-                    "ref": chunk_ref,
-                    "type": "diff",
-                    "checksum": chunk_checksum,
-                    "active": False,
-                }
-                document, truncate_ratio = truncate(document, db.embedding_model)
-                if truncate_ratio > 0 and self.verbose:
-                    print(f"Truncated diff chunk {chunk_id} by {truncate_ratio:.2%}")
-                add_to_db["ids"].append(chunk_checksum)
-                add_to_db["documents"].append(document)
-                add_to_db["metadatas"].append(data)
-            else:
-                data = existing_records["metadatas"][0]
+            data = {
+                "id": chunk_id,
+                "ref": chunk_ref,
+                "type": "diff",
+                "document": document,
+                "checksum": chunk_checksum,
+                "active": False,
+            }
             graph.add_node(chunk_id, **data)
-            edges_to_add.add((self.id, chunk_id))
-            # Match file/chunk nodes in graph
-            path_ref = chunk_id.split(":", 1)[1]
-            file, lines = parse_path_ref(path_ref)
-            file_str = str(file)
-            if file_str not in graph:  # Removed files
+            graph.add_edge(self.id, chunk_id, type="diff")
+            checksums[chunk_id] = chunk_checksum
+
+            # Link it to all overlapping chunks (if file has chunks) or to the file
+            _, path, lines = parse_diff_id(chunk_id)
+            if not path:
+                continue
+            path_str = path.as_posix()
+            if path_str not in graph:  # Removed files
                 if self.verbose:
-                    print(f"File {file_str} not in graph")
+                    print(f"File {path_str} not in graph")
+                continue
+            link_to = set()
+            for node, data in graph.nodes(data=True):
+                if not node.startswith(f"{path_str}:") or data.get("type") != "chunk":
+                    continue
+                _, _lines = parse_path_ref(data["ref"])
+                if lines and _lines and lines.intersection(_lines):
+                    link_to.add(node)
+            if len(link_to) == 0:
+                link_to.add(path_str)
+            for node in link_to:
+                graph.add_edge(node, chunk_id, type="link")
+
+        # Sync with remote DB
+        ids = list(set(checksums.values()))
+        response = db.get(ids=ids, include=[])
+        db_data = set(response["ids"])
+        add_to_db = {"ids": [], "documents": [], "metadatas": []}
+        for id, checksum in checksums.items():
+            if checksum in db_data:
                 continue
-            edges_to_add.add((chunk_id, file_str))
-
-            def _link_to_successors(_node, visited=set()):
-                for successor in graph.successors(_node):
-                    if successor in visited:
-                        continue
-                    visited.add(successor)
-                    edge = (chunk_id, successor)
-                    _data = graph.nodes[successor]
-                    if _data.get("type") not in ["file", "chunk"]:
-                        continue
-                    _, _lines = parse_path_ref(_data["ref"])
-                    if lines and _lines and lines.intersection(_lines):
-                        edges_to_add.add(edge)
-                    _link_to_successors(successor, visited)
-
-            _link_to_successors(file_str)
-
-        for source, target in edges_to_add:
-            graph.add_edge(source, target, type="diff")
+            data = deepcopy(graph.nodes[id])
+            document = data.pop("document")
+            if "chunks" in data:
+                data["chunks"] = json.dumps(data["chunks"])
+            document, truncate_ratio = truncate(document, db.embedding_model)
+            if self.verbose and truncate_ratio > 0:
+                print(f"Truncated {id} by {truncate_ratio:.2%}")
+            add_to_db["ids"].append(checksum)
+            add_to_db["documents"].append(document)
+            add_to_db["metadatas"].append(data)
         if len(add_to_db["ids"]) > 0:
             add_to_db = remove_add_to_db_duplicates(**add_to_db)
-            db.upsert(**add_to_db)
+            db.add(**add_to_db)
 
         return graph
diff --git a/ragdaemon/annotators/hierarchy.py b/ragdaemon/annotators/hierarchy.py
index 397a151..b22919b 100644
--- a/ragdaemon/annotators/hierarchy.py
+++ b/ragdaemon/annotators/hierarchy.py
@@ -1,3 +1,4 @@
+from copy import deepcopy
 from pathlib import Path
 
 from ragdaemon.annotators.base_annotator import Annotator
@@ -8,94 +9,6 @@
 from ragdaemon.utils import get_document, hash_str, truncate
 
 
-def get_active_checksums(
-    cwd: Path,
-    db: Database,
-    refresh: bool = False,
-    verbose: bool = False,
-    ignore_patterns: set[Path] = set(),
-) -> dict[Path, str]:
-    # Get checksums for all active files
-    checksums: dict[Path, str] = {}
-    paths = get_paths_for_directory(cwd, exclude_patterns=ignore_patterns)
-    add_to_db = {
-        "ids": [],
-        "documents": [],
-        "metadatas": [],
-    }
-    for path in paths:
-        try:
-            path_str = path.as_posix()
-            ref = path_str
-            document = get_document(ref, cwd)
-            checksum = hash_str(document)
-            existing_record = len(db.get(checksum)["ids"]) > 0
-            if refresh or not existing_record:
-                # add new items to db (will generate embeddings)
-                metadatas = {
-                    "id": path_str,
-                    "type": "file",
-                    "ref": ref,
-                    "checksum": checksum,
-                    "active": False,
-                }
-                document, truncate_ratio = truncate(document, db.embedding_model)
-                if truncate_ratio > 0 and verbose:
-                    print(f"Truncated {path_str} by {truncate_ratio:.2%}")
-                add_to_db["ids"].append(checksum)
-                add_to_db["documents"].append(document)
-                add_to_db["metadatas"].append(metadatas)
-            checksums[path] = checksum
-        except UnicodeDecodeError:  # Ignore non-text files
-            pass
-        except RagdaemonError as e:
-            if verbose:
-                print(f"Error processing path {path}: {e}")
-
-    # Get checksums for all active directories
-    directories = set()
-    for path in paths:
-        for parent in path.parents:
-            if parent not in paths:
-                directories.add(parent)
-    for path in directories:
-        ref = path.as_posix()
-        document = get_document(ref, cwd, type="directory")
-
-        # The checksum for a directory is the hash of the checksums of its subpaths,
-        # which are listed in the document and were computed above.
-        subdir_checksums = ""
-        for subpath in document.split("\n")[1:]:
-            subpath = Path(ref) / subpath
-            if subpath in checksums:
-                subdir_checksums += checksums[subpath]
-            else:
-                raise RagdaemonError(f"Checksum not found for {subpath}")
-        checksum = hash_str(subdir_checksums)
-
-        existing_record = len(db.get(checksum)["ids"]) > 0
-        if refresh or not existing_record:
-            metadatas = {
-                "id": ref,
-                "type": "directory",
-                "ref": ref,
-                "checksum": checksum,
-                "active": False,
-            }
-            document, truncate_ratio = truncate(document, db.embedding_model)
-            if truncate_ratio > 0 and verbose:
-                print(f"Truncated {ref} by {truncate_ratio:.2%}")
-            add_to_db["ids"].append(checksum)
-            add_to_db["documents"].append(document)
-            add_to_db["metadatas"].append(metadatas)
-        checksums[path] = checksum
-
-    if len(add_to_db["ids"]) > 0:
-        add_to_db = remove_add_to_db_duplicates(**add_to_db)
-        db.upsert(**add_to_db)
-    return checksums
-
-
 def files_checksum(cwd: Path, ignore_patterns: set[Path] = set()) -> str:
     timestamps = ""
     for path in get_paths_for_directory(cwd, exclude_patterns=ignore_patterns):
@@ -124,44 +37,88 @@ async def annotate(
         self, graph: KnowledgeGraph, db: Database, refresh: bool = False
     ) -> KnowledgeGraph:
         """Build a graph of active files and directories with hierarchy edges."""
-        cwd = Path(graph.graph["cwd"])
-        checksums = get_active_checksums(
-            cwd,
-            db,
-            refresh=refresh,
-            verbose=self.verbose,
-            ignore_patterns=self.ignore_patterns,
-        )
-        _files_checksum = files_checksum(cwd, self.ignore_patterns)
 
-        # Initialize an empty graph. We'll build it from scratch.
+        # Initialize a new graph from scratch with same cwd
+        cwd = Path(graph.graph["cwd"])
         graph = KnowledgeGraph()
         graph.graph["cwd"] = str(cwd)
-        edges_to_add = set()
-        for path, checksum in checksums.items():
-            # add db reecord
-            id = path.as_posix() if len(path.parts) > 0 else "ROOT"
-            results = db.get(checksum)
-            data = results["metadatas"][0]
-            graph.add_node(id, **data)
-            if id == "ROOT":
-                continue
 
-            # add hierarchy edges
-            def _link_to_cwd(_path: Path):
-                _parent = _path.parent.as_posix() if len(_path.parts) > 1 else "ROOT"
-                edges_to_add.add((_parent, _path.as_posix()))
-                if _parent != "ROOT":
-                    _link_to_cwd(_path.parent)
-
-            _link_to_cwd(path)
+        # Load active files/dirs and checksums
+        checksums = dict[Path, str]()
+        paths = get_paths_for_directory(cwd, exclude_patterns=self.ignore_patterns)
+        directories = set()
+        edges = set()
+        for path in paths:
+            path_str = path.as_posix()
+            document = get_document(path_str, cwd)
+            checksum = hash_str(document)
+            data = {
+                "id": path_str,
+                "type": "file",
+                "ref": path_str,
+                "document": document,
+                "checksum": checksum,
+                "active": False,
+            }
+            graph.add_node(path_str, **data)
+            checksums[path] = checksum
+            # Record parents & edges
+            _last = path
+            for parent in path.parents:
+                if len(parent.parts) == 0:
+                    parent = Path("ROOT")
+                directories.add(parent)
+                edges.add((parent.as_posix(), _last.as_posix()))
+                _last = parent
+
+        for dir in directories:
+            dir_str = dir.as_posix()
+            dir_path = dir if dir != Path("ROOT") else Path(".")
+            document = get_document(dir_str, cwd, type="directory")
+            checksum = hash_str(
+                "".join(
+                    checksums[dir_path / subpath]
+                    for subpath in document.split("\n")[1:]
+                )
+            )
+            data = {
+                "id": dir_str,
+                "type": "directory",
+                "ref": dir_str,
+                "document": document,
+                "checksum": checksum,
+                "active": False,
+            }
+            graph.add_node(dir_str, **data)
+            checksums[dir] = checksum
 
-        # Add directory nodes with checksums
-        for source, target in edges_to_add:
+        for source, target in edges:
             for id in (source, target):
                 if id not in graph:
                     raise RagdaemonError(f"Node {id} not found in graph")
             graph.add_edge(source, target, type="hierarchy")
 
-        graph.graph["files_checksum"] = _files_checksum
+        # Sync with remote DB
+        ids = list(set(checksums.values()))
+        response = db.get(ids=ids, include=["metadatas"])
+        db_data = {id: data for id, data in zip(response["ids"], response["metadatas"])}
+        add_to_db = {"ids": [], "documents": [], "metadatas": []}
+        for path, checksum in checksums.items():
+            if checksum in db_data:
+                data = db_data[checksum]
+                graph.nodes[path.as_posix()].update(data)
+            else:
+                data = deepcopy(graph.nodes[path.as_posix()])
+                document = data.pop("document")
+                document, truncate_ratio = truncate(document, db.embedding_model)
+                if self.verbose and truncate_ratio > 0:
+                    print(f"Truncated {path} by {truncate_ratio:.2%}")
+                add_to_db["ids"].append(checksum)
+                add_to_db["documents"].append(document)
+                add_to_db["metadatas"].append(data)
+        if len(add_to_db["ids"]) > 0:
+            add_to_db = remove_add_to_db_duplicates(**add_to_db)
+            db.add(**add_to_db)
+
+        graph.graph["files_checksum"] = files_checksum(cwd, self.ignore_patterns)
         return graph
diff --git a/ragdaemon/annotators/summarizer.py b/ragdaemon/annotators/summarizer.py
index 66fb16a..ab467f6 100644
--- a/ragdaemon/annotators/summarizer.py
+++ b/ragdaemon/annotators/summarizer.py
@@ -8,7 +8,7 @@
 
 from ragdaemon.annotators.base_annotator import Annotator
 from ragdaemon.context import ContextBuilder
-from ragdaemon.database import Database
+from ragdaemon.database import Database, remove_update_db_duplicates
 from ragdaemon.graph import KnowledgeGraph
 from ragdaemon.errors import RagdaemonError
 from ragdaemon.utils import DEFAULT_COMPLETION_MODEL, hash_str, semaphore, truncate
@@ -70,7 +70,6 @@ def build_filetree(
 def get_document_and_context(
     node: str,
     graph: KnowledgeGraph,
-    db: Database,
     summary_field_id: str = "summary",
     model: Optional[TextModel] = None,
 ) -> tuple[str, str]:
@@ -85,12 +84,12 @@ def get_document_and_context(
     if data.get("type") == "directory":
         document = f"Directory: {node}"
     else:
-        cb = ContextBuilder(graph, db)
+        cb = ContextBuilder(graph)
         cb.add_id(node)
         document = cb.render()
 
     if data.get("type") == "chunk":
-        cb = ContextBuilder(graph, db)
+        cb = ContextBuilder(graph)
 
         # Parent chunks back to the file
         def get_hierarchical_parents(target: str, cb: ContextBuilder):
@@ -212,7 +211,6 @@ def is_complete(self, graph: KnowledgeGraph, db: Database) -> bool:
             document, context = get_document_and_context(
                 node,
                 graph,
-                db,
                 summary_field_id=self.summary_field_id,
                 model=self.model,
             )
@@ -225,16 +223,15 @@ async def generate_summary(
         self,
         node: str,
         graph: KnowledgeGraph,
-        db: Database,
         loading_bar: Optional[tqdm] = None,
         refresh: bool = False,
     ):
-        """Asynchronously generate summary and update graph and db"""
+        """Asynchronously generate summary and update graph"""
         if self.spice_client is None:
             raise RagdaemonError("Spice client not initialized")
 
         document, context = get_document_and_context(
-            node, graph, db, summary_field_id=self.summary_field_id, model=self.model
+            node, graph, summary_field_id=self.summary_field_id, model=self.model
         )
         summary_checksum = hash_str(document + context)
         data = graph.nodes[node]
@@ -263,14 +260,9 @@ async def generate_summary(
                 )
             summary = response.text
 
-            record = db.get(data["checksum"])
-            metadatas = record["metadatas"][0]
             if summary != "PASS":
-                metadatas[self.summary_field_id] = summary
                 data[self.summary_field_id] = summary
-            metadatas[self.checksum_field_id] = summary_checksum
             data[self.checksum_field_id] = summary_checksum
-            db.update(data["checksum"], metadatas=metadatas)
 
         if loading_bar is not None:
             loading_bar.update(1)
@@ -279,7 +271,6 @@ async def dfs(
         self,
         node: str,
         graph: KnowledgeGraph,
-        db: Database,
         loading_bar: Optional[tqdm] = None,
         refresh: bool = False,
     ):
@@ -291,29 +282,40 @@ async def dfs(
             and graph.nodes[edge[1]].get("type") in self.summarize_nodes
         ]
         if children:
-            tasks = [
-                self.dfs(child, graph, db, loading_bar, refresh) for child in children
-            ]
+            tasks = [self.dfs(child, graph, loading_bar, refresh) for child in children]
             await asyncio.gather(*tasks)
-        await self.generate_summary(node, graph, db, loading_bar, refresh)
+        await self.generate_summary(node, graph, loading_bar, refresh)
 
     async def annotate(
         self, graph: KnowledgeGraph, db: Database, refresh: bool = False
     ) -> KnowledgeGraph:
         """Asynchronously generate or fetch summaries and add to graph/db"""
+        summaries = dict[str, str]()
+        for node, data in graph.nodes(data=True):
+            if data is not None and data.get("type") in self.summarize_nodes:
+                summaries[node] = data.get(self.checksum_field_id, "")
+
         if self.verbose:
-            n = len(
-                [
-                    node
-                    for node, data in graph.nodes(data=True)
-                    if data is not None and data.get("type") in self.summarize_nodes
-                ]
-            )
-            loading_bar = tqdm(total=n, desc="Summarizing code...")
+            loading_bar = tqdm(total=len(summaries), desc="Summarizing code...")
         else:
             loading_bar = None
 
-        await self.dfs("ROOT", graph, db, loading_bar, refresh)
+        await self.dfs("ROOT", graph, loading_bar, refresh)
+
+        update_db = {"ids": [], "metadatas": []}
+        for node, summary_checksum in summaries.items():
+            if graph.nodes[node].get(self.checksum_field_id) != summary_checksum:
+                data = graph.nodes[node]
+                update_db["ids"].append(data["checksum"])
+                update_db["metadatas"].append(
+                    {
+                        self.summary_field_id: data[self.summary_field_id],
+                        self.checksum_field_id: data[self.checksum_field_id],
+                    }
+                )
+        if len(update_db["ids"]) > 1:
+            update_db = remove_update_db_duplicates(**update_db)
+            db.update(**update_db)
 
         if loading_bar is not None:
             loading_bar.close()
diff --git a/ragdaemon/context.py b/ragdaemon/context.py
index 650d907..d51f982 100644
--- a/ragdaemon/context.py
+++ b/ragdaemon/context.py
@@ -37,16 +37,15 @@ def render_comments(comments: list[Comment]) -> str:
 class ContextBuilder:
     """Renders items from a graph into an llm-readable string."""
 
-    def __init__(self, graph: KnowledgeGraph, db: Database, verbose: bool = False):
+    def __init__(self, graph: KnowledgeGraph, verbose: bool = False):
         self.graph = graph
-        self.db = db
         self.verbose = verbose
         self.context = dict[
             str, dict[str, Any]
         ]()  # {path: {lines, tags, document, diff}}
 
     def copy(self):
-        duplicate = ContextBuilder(self.graph, self.db, self.verbose)
+        duplicate = ContextBuilder(self.graph, self.verbose)
         duplicate.context = deepcopy(self.context)
         return duplicate
 
@@ -69,20 +68,17 @@ def _add_path(self, path_str: str):
         """Create a new record in the context for the given path."""
         document = None
         if path_str in self.graph:
-            checksum = self.graph.nodes[path_str]["checksum"]
-            document = self.db.get(checksum)["documents"][0]
+            document = self.graph.nodes[path_str]["document"]
             if document.endswith("[TRUNCATED]"):
                 document = None
         if document is None:  # Truncated or deleted
             try:
-                # Could be an ignored file, in which case load it into graph/db
                 # TODO: Add ignored files to the graph/database
                 cwd = Path(self.graph.graph["cwd"])
                 document = get_document(path_str, cwd, type="file")
             except FileNotFoundError:
                 # Or could be deleted but have a diff
                 document = f"{path_str}\n[DELETED]"
-            checksum = hash_str(document)
         message = {
             "lines": set(),
             "tags": set(),
@@ -258,8 +254,7 @@ def render_diffs(self, ids: set[str]) -> str:
             git_command += f" {diff_str}"
         output += f"{git_command}\n"
         for id in sorted(ids):
-            checksum = self.graph.nodes[id]["checksum"]
-            document = self.db.get(checksum)["documents"][0]
+            document = self.graph.nodes[id]["document"]
             # TODO: Add line numbers
             without_git_command = "\n".join(document.split("\n")[1:])
             output += without_git_command + "\n"
diff --git a/ragdaemon/daemon.py b/ragdaemon/daemon.py
index d0bcc57..93d0dfc 100644
--- a/ragdaemon/daemon.py
+++ b/ragdaemon/daemon.py
@@ -144,9 +144,7 @@ def search(
         return self.db.query_graph(query, self.graph, n=n, node_types=node_types)
 
     def get_document(self, filename: str) -> str:
-        checksum = self.graph.nodes[filename]["checksum"]
-        document = self.db.get(checksum)["documents"][0]
-        return document
+        return self.graph.nodes[filename]["document"]
 
     def get_context(
         self,
@@ -157,7 +155,7 @@ def get_context(
         model: Model | str = DEFAULT_COMPLETION_MODEL,
     ) -> ContextBuilder:
         if context_builder is None:
-            context = ContextBuilder(self.graph, self.db, self.verbose)
+            context = ContextBuilder(self.graph, self.verbose)
         else:
             # TODO: Compare graph hashes, reconcile changes
             context = context_builder
diff --git a/ragdaemon/database/__init__.py b/ragdaemon/database/__init__.py
index dbafd62..5eb1f1b 100644
--- a/ragdaemon/database/__init__.py
+++ b/ragdaemon/database/__init__.py
@@ -3,9 +3,12 @@
 from typing import Optional
 
 from spice import Spice
-from spice.errors import SpiceError
 
-from ragdaemon.database.chroma_database import ChromaDB, remove_add_to_db_duplicates
+from ragdaemon.database.chroma_database import (
+    ChromaDB,
+    remove_add_to_db_duplicates,
+    remove_update_db_duplicates,
+)
 from ragdaemon.database.database import Database
 from ragdaemon.database.lite_database import LiteDB
 from ragdaemon.utils import mentat_dir_path
diff --git a/ragdaemon/database/chroma_database.py b/ragdaemon/database/chroma_database.py
index f34f44c..78bf7b4 100644
--- a/ragdaemon/database/chroma_database.py
+++ b/ragdaemon/database/chroma_database.py
@@ -13,12 +13,6 @@
 
 MAX_INPUTS_PER_CALL = 2048
 
-if TYPE_CHECKING:
-    from chromadb.api.types import (
-        GetResult,
-        Metadata,
-    )
-
 
 def remove_add_to_db_duplicates(
     ids: list[str], documents: list[str], metadatas: list[dict]
@@ -34,6 +28,19 @@ def remove_add_to_db_duplicates(
     return output
 
 
+def remove_update_db_duplicates(
+    ids: list[str], metadatas: list[dict]
+) -> dict[str, Any]:
+    seen = set()
+    output = {"ids": [], "metadatas": []}
+    for id, metadata in zip(ids, metadatas):
+        if id not in seen:
+            output["ids"].append(id)
+            output["metadatas"].append(metadata)
+            seen.add(id)
+    return output
+
+
 class ChromaDB(Database):
     def __init__(
         self,
@@ -105,39 +112,44 @@ def __call__(self, input_texts: Embeddable) -> Embeddings:
         )
 
     def query(self, query: str, active_checksums: list[str]) -> list[dict]:
+        """
+        Since we add many different versions of each file to Chroma, we can't do a
+        straightforward query, because it'd return multiple version of the same file.
+
+        The best workaround I've found for this is using the 'active' flag in metadata.
+        The downside is that it requires 2 additional calls to the database each time:
+        one to set it, another to unset it. The extra time is negligible for local DBs
+        and hopefully not unreasonable for remote.
+
+        There's a third "extra" call to validate the active_checksums. If we don't do
+        this it will still function properly but it will print a lot of warnings.
+        """
+        valid_checksums = self._collection.get(ids=active_checksums, include=[])["ids"]
         # Flag active records
-        result: GetResult = self._collection.get(active_checksums)
-        metadatas: Optional[list[Metadata]] = result["metadatas"]
-        if not metadatas or len(metadatas) == 0:
-            return []
-        updates = {"ids": [], "metadatas": []}
-        for metadata in metadatas:
-            updates["ids"].append(metadata["checksum"])
-            updates["metadatas"].append({**metadata, "active": True})
+        updates = {
+            "ids": valid_checksums,
+            "metadatas": [{"active": True} for _ in valid_checksums],
+        }
         self._collection.update(**updates)
         # Query
         response = self._collection.query(
             query_texts=query,
             where={"active": True},
-            n_results=len(metadatas),
+            n_results=len(valid_checksums),
+            include=["distances"],
         )
         # Remove flags
-        updates["metadatas"] = [{**metadata, "active": False} for metadata in metadatas]
+        updates = {
+            "ids": valid_checksums,
+            "metadatas": [{"active": False} for _ in valid_checksums],
+        }
         self._collection.update(**updates)
-        # Parse results. Return results for the 'first query' only
-        if (
-            response is None
-            or response["metadatas"] is None
-            or response["documents"] is None
-            or response["distances"] is None
-        ):
+        if response is None or response["distances"] is None:
             return []
-        _metadatas = response["metadatas"][0]
-        _documents = response["documents"][0]
-        _distances = response["distances"][0]
+        # Parse results. Return results for the 'first query' only
         results = [
-            {**m, "document": do, "distance": di}
-            for m, do, di in zip(_metadatas, _documents, _distances)
+            {"checksum": id, "distance": distance}
+            for id, distance in zip(response["ids"][0], response["distances"][0])
         ]
         results = sorted(results, key=lambda x: x["distance"])
         return results
diff --git a/ragdaemon/database/database.py b/ragdaemon/database/database.py
index 95bca7c..2b36c26 100644
--- a/ragdaemon/database/database.py
+++ b/ragdaemon/database/database.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Iterable, Optional
+from typing import Any, Iterable, Optional
 
 from ragdaemon.graph import KnowledgeGraph
 
@@ -30,19 +30,24 @@ def query_graph(
         Chroma's default search covers all records, including inactive ones, so we
         manually flag the active records, query them, and then unflag them.
         """
-        active_checksums = list(
-            {
-                data["checksum"]
-                for _, data in graph.nodes(data=True)
-                if data and "checksum" in data and data["type"] in node_types
-            }
-        )
-        results = self.query(query, active_checksums)
+        checksum_index = {
+            data["checksum"]: node
+            for node, data in graph.nodes(data=True)
+            if data and "checksum" in data and data["type"] in node_types
+        }
+        response = self.query(query, list(checksum_index.keys()))
+
+        # Add (local) metadata to results
+        results = list[dict[str, Any]]()
+        for result in response:
+            node = checksum_index[result["checksum"]]
+            data = graph.nodes[node]
+            result = {**result, **data}
+            results.append(result)
 
         # Add exact-match multiplier
         for result in results:
             distance = result["distance"]
-            # Multiply by 2 if query is in the NAME
             type = result["type"]
             if type == "file":
                 name = Path(result["id"]).name
diff --git a/ragdaemon/database/lite_database.py b/ragdaemon/database/lite_database.py
index 821cf4c..ddea1e9 100644
--- a/ragdaemon/database/lite_database.py
+++ b/ragdaemon/database/lite_database.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Any
+from typing import Any, Optional
 
 from ragdaemon.database.database import Database
 
@@ -13,12 +13,8 @@ def __init__(self, cwd: Path, db_path: Path):
     def query(self, query: str, active_checksums: list[str]) -> list[dict]:
         response = self._collection.query(query, active_checksums)
         results = [
-            {**data, "document": document, "distance": distance}
-            for data, document, distance in zip(
-                response["metadatas"][0],
-                response["documents"][0],
-                response["distances"][0],
-            )
+            {"checksum": id, "distance": distance}
+            for id, distance in zip(response["ids"][0], response["distances"][0])
         ]
         results = sorted(results, key=lambda x: x["distance"])
         return results
@@ -30,13 +26,13 @@ class LiteCollection:
     Matches the chroma Collection API except:
     - No embeddings
     - In-memory
-    - A basic hand-coded search algo
+    - Query returns all distances=1
     """
 
     def __init__(self):
         self.data = dict[str, dict[str, Any]]()  # {id: {metadatas, document}}
 
-    def get(self, ids: list[str] | str) -> dict:
+    def get(self, ids: list[str] | str, include: Optional[list[str]] = None) -> dict:
         if isinstance(ids, str):
             ids = [ids]
         output = {"ids": [], "metadatas": [], "documents": []}
@@ -45,6 +41,8 @@ def get(self, ids: list[str] | str) -> dict:
                 output["ids"].append(id)
                 output["metadatas"].append(self.data[id]["metadatas"])
                 output["documents"].append(self.data[id]["document"])
+        if include:
+            output = {k: v for k, v in output.items() if k in include or k == "ids"}
         return output
 
     def count(self) -> int:
@@ -65,12 +63,10 @@ def query(self, query: str, active_checksums: list[str]) -> dict[str, list[Any]]
         ]
         return {
             "ids": [[r["id"] for r in records]],
-            "metadatas": [[r["metadatas"] for r in records]],
-            "documents": [[r["document"] for r in records]],
             "distances": [[1] * len(records)],
         }
 
-    def upsert(
+    def add(
         self,
         ids: list[str] | str,
         metadatas: list[dict] | dict,
diff --git a/ragdaemon/utils.py b/ragdaemon/utils.py
index 0156039..1dd81b1 100644
--- a/ragdaemon/utils.py
+++ b/ragdaemon/utils.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 
 from spice import Spice
-from spice.models import GPT_4_TURBO, Model, UnknownModel
+from spice.models import GPT_4o_2024_05_13, Model, UnknownModel
 from spice.spice import get_model_from_name
 
 from ragdaemon.errors import RagdaemonError
@@ -41,7 +41,7 @@
 ]
 
 
-DEFAULT_COMPLETION_MODEL = GPT_4_TURBO
+DEFAULT_COMPLETION_MODEL = GPT_4o_2024_05_13
 
 
 def hash_str(string: str) -> str:
diff --git a/tests/annotators/test_chunker_llm.py b/tests/annotators/test_chunker_llm.py
index badc8a9..5eee24a 100644
--- a/tests/annotators/test_chunker_llm.py
+++ b/tests/annotators/test_chunker_llm.py
@@ -37,7 +37,7 @@ def expected_chunks():
 async def test_chunker_llm_edge_cases(cwd, expected_chunks):
     # NOTE: TO RUN THIS YOU HAVE TO COMMENT_OUT tests/conftest.py/mock_openai_api_key
     daemon = Daemon(cwd, annotators={"hierarchy": {}})
-    chunker = ChunkerLLM(spice_client=daemon.spice_client)
+    chunker = ChunkerLLM(spice_client=daemon.spice_client, batch_size=10)
 
     # One example with all the edge cases (when batch_size = 10 lines):
     # - First batch ends mid-class, so second batch needs 'call path'
@@ -45,7 +45,7 @@ async def test_chunker_llm_edge_cases(cwd, expected_chunks):
     # - Third batch is all inside one function, so needs to pass call forward.
     text = Path("tests/data/hard_to_chunk.txt").read_text()
     document = f"src/calculator.py\n{text}"
-    actual_chunks = await chunker.chunk_document(document, batch_size=10)
+    actual_chunks = await chunker.chunk_document(document)
 
     print(actual_chunks)
 
diff --git a/tests/annotators/test_diff.py b/tests/annotators/test_diff.py
index c86a083..bfedfd2 100644
--- a/tests/annotators/test_diff.py
+++ b/tests/annotators/test_diff.py
@@ -59,7 +59,7 @@ async def test_diff_render(git_history, mock_db):
     await daemon.update(refresh=True)
 
     # Only diffs
-    context = ContextBuilder(daemon.graph, daemon.db)
+    context = ContextBuilder(daemon.graph)
     context.add_diff("DEFAULT:main.py")
     context.add_diff("DEFAULT:src/operations.py:1-5")
     context.add_diff("DEFAULT:src/operations.py:8-10")
diff --git a/tests/annotators/test_hierarchy.py b/tests/annotators/test_hierarchy.py
index f86c257..686967a 100644
--- a/tests/annotators/test_hierarchy.py
+++ b/tests/annotators/test_hierarchy.py
@@ -4,30 +4,10 @@
 from networkx.readwrite import json_graph
 import pytest
 
-from ragdaemon.annotators.hierarchy import Hierarchy, get_active_checksums
+from ragdaemon.annotators.hierarchy import Hierarchy
 from ragdaemon.graph import KnowledgeGraph
 
 
-def test_get_active_checksums(cwd, mock_db):
-    checksums = get_active_checksums(cwd, mock_db)
-    assert isinstance(checksums, dict), "Checksums is not a dict"
-    assert all(isinstance(k, Path) for k in checksums), "Keys are not all Paths"
-    assert all(
-        isinstance(v, str) for v in checksums.values()
-    ), "Values are not all strings"
-
-    hierarchy_graph = KnowledgeGraph.load("tests/data/hierarchy_graph.json")
-    expected = {
-        (node, data["checksum"])
-        for node, data in hierarchy_graph.nodes(data=True)
-        if data and "checksum" in data
-    }
-    # Replace checksums "." with "ROOT"
-    checksums[Path("ROOT")] = checksums.pop(Path("."))
-    actual = {(path.as_posix(), checksum) for path, checksum in checksums.items()}
-    assert actual == expected, "Checksums are not equal"
-
-
 def test_hierarchy_is_complete(cwd, mock_db):
     empty_graph = KnowledgeGraph()
     empty_graph.graph["cwd"] = cwd.as_posix()
diff --git a/tests/annotators/test_summarizer.py b/tests/annotators/test_summarizer.py
index 87c6a9c..0e97a82 100644
--- a/tests/annotators/test_summarizer.py
+++ b/tests/annotators/test_summarizer.py
@@ -1,6 +1,8 @@
+import json
 from pathlib import Path
 
 import pytest
+from networkx.readwrite import json_graph
 
 from ragdaemon.annotators.summarizer import (
     build_filetree,
@@ -46,14 +48,13 @@ async def test_build_filetree(cwd):
 @pytest.mark.asyncio
 async def test_get_document_and_context(cwd):
     graph = KnowledgeGraph.load("tests/data/summarizer_graph.json")  # Chunk data
-    db = LiteDB(cwd=cwd, db_path=Path("."))
     for _, data in graph.nodes(data=True):
         document = get_document(data["ref"], cwd=cwd, type=data["type"])
-        db._collection.upsert(ids=data["checksum"], documents=document, metadatas=data)
+        data["document"] = document
 
     # A chunk
     document, context = get_document_and_context(
-        "src/interface.py:parse_arguments", graph, db
+        "src/interface.py:parse_arguments", graph
     )
     assert (
         document
@@ -107,7 +108,7 @@ async def test_get_document_and_context(cwd):
     )
 
     # A file
-    document, context = get_document_and_context("src/interface.py", graph, db)
+    document, context = get_document_and_context("src/interface.py", graph)
     assert document.startswith("src/interface.py\n")
     assert (
         context
@@ -131,7 +132,7 @@ async def test_get_document_and_context(cwd):
     )
 
     # A directory
-    document, context = get_document_and_context("src", graph, db)
+    document, context = get_document_and_context("src", graph)
     assert document == "Directory: src"
     assert (
         context
diff --git a/tests/data/summarizer_graph.json b/tests/data/summarizer_graph.json
index 3456932..9c33771 100644
--- a/tests/data/summarizer_graph.json
+++ b/tests/data/summarizer_graph.json
@@ -28,7 +28,8 @@
             "ref": "src/interface.py",
             "summary": "Parse command-line input to extract operands and an operator for arithmetic operations and display the output to the console.",
             "summary_checksum": "156e6b95a939cc690524c7e96448c787",
-            "type": "file"
+            "type": "file",
+            "document": "src/interface.py\nimport argparse\nimport re\n\n\ndef parse_arguments():\n    parser = argparse.ArgumentParser(description=\"Basic Calculator\")\n    parser.add_argument(\"operation\", type=str, help=\"Calculation operation\")\n    args = parser.parse_args()\n\n    # use re to parse symbol, nubmer before, nubmer after\n    match = re.match(r\"(\\d+)(\\D)(\\d+)\", args.operation)\n    if match is None:\n        raise ValueError(\"Invalid operation\")\n    return int(match.group(1)), match.group(2), int(match.group(3))\n\n\ndef render_response(result):\n    print(result)\n"
         },
         {
             "active": false,
@@ -37,7 +38,8 @@
             "ref": "README.md",
             "summary": "Describe the application's experimental purpose in testing the limits of the treesitter parser.",
             "summary_checksum": "f512afb951427a1494eecd927607aa42",
-            "type": "file"
+            "type": "file",
+            "document": "README.md\nAn unnecessarily convoluted app to test the boundaries of the treesitter parser\n"
         },
         {
             "active": false,
@@ -48,7 +50,8 @@
             "ref": "src/__init__.py",
             "summary": "Establish the 'src' as a Python package to organize related modules concerning command-line based arithmetic operations, without adding any explicit functionality.",
             "summary_checksum": "207e3de4ed658542202ca6ccc3376a96",
-            "type": "file"
+            "type": "file",
+            "document": "src/__init__.py\n"
         },
         {
             "active": false,
@@ -57,7 +60,8 @@
             "ref": ".gitignore",
             "summary": "Manage exclusions for version control by specifying files and directories that Git should ignore, while ensuring the .gitignore file itself remains tracked.",
             "summary_checksum": "5f3c1aebfa8418a5845a2c5ddc2b33cf",
-            "type": "file"
+            "type": "file",
+            "document": ".gitignore\n.ragdaemon\n**/.*\n**/__pycache__\n!.gitignore\n"
         },
         {
             "active": false,
@@ -93,7 +97,8 @@
             "ref": "src/operations.py",
             "summary": "Define basic arithmetic operations including addition, subtraction, multiplication, division, and square root calculation utilizing Python's math library.",
             "summary_checksum": "f32593b8091a214cc0042312abb4626c",
-            "type": "file"
+            "type": "file",
+            "document": "src/operations.py\nimport math\n\n\ndef add(a, b):\n    return a + b\n\n\ndef subtract(a, b):\n    return a - b\n\n\ndef multiply(a, b):\n    return a * b\n\n\ndef divide(a, b):\n    return a / b\n\n\ndef sqrt(a):\n    return math.sqrt(a)\n"
         },
         {
             "active": false,
@@ -113,7 +118,8 @@
             "ref": "main.py",
             "summary": "Execute arithmetic operations based on command-line input and produce an output.",
             "summary_checksum": "23112504dd4d8d6daf28cb234eb9a7f3",
-            "type": "file"
+            "type": "file",
+            "document": "main.py\nfrom src.interface import parse_arguments, render_response\nfrom src.operations import add, divide, multiply, subtract\n\n\ndef main():\n    a, op, b = parse_arguments()\n\n    if op == \"+\":\n        result = add(a, b)\n    elif op == \"-\":\n        result = subtract(a, b)\n    elif op == \"*\":\n        result = multiply(a, b)\n    elif op == \"/\":\n        result = divide(a, b)\n    else:\n        raise ValueError(\"Unsupported operation\")\n\n    render_response(result)\n\n\nif __name__ == \"__main__\":\n    main()\n"
         },
         {
             "active": false,
@@ -122,7 +128,8 @@
             "ref": ".",
             "summary": "Execute simple arithmetic operations from command-line input and explore the capabilities of the treesitter parser, organizing the code into clear modules within the source directory.",
             "summary_checksum": "53a5d32f0cc62b73d0129d17ca1a64e5",
-            "type": "directory"
+            "type": "directory",
+            "document": ".\n.gitignore\nREADME.md\nmain.py\nsrc/__init__.py\nsrc/interface.py\nsrc/operations.py"
         },
         {
             "active": false,
@@ -131,7 +138,8 @@
             "ref": "src",
             "summary": "Organize code modules for a simple arithmetic operations application. It includes files for initializing the package, parsing command-line input, and defining arithmetic operations.",
             "summary_checksum": "0d9635c66b257d6f31f165fea667e3fd",
-            "type": "directory"
+            "type": "directory",
+            "document": "src\n__init__.py\ninterface.py\noperations.py"
         },
         {
             "active": false,
@@ -140,7 +148,8 @@
             "ref": "src/interface.py:1-4,15-16,19",
             "summary": "No action is described as the provided code only includes import statements.",
             "summary_checksum": "775efab314470411a3b831802154edff",
-            "type": "chunk"
+            "type": "chunk",
+            "document": "src/interface.py:1-4,15-16,19\nimport argparse\nimport re\n\n\n\n\n\n"
         },
         {
             "active": false,
@@ -149,7 +158,8 @@
             "ref": "src/interface.py:5-14",
             "summary": "Parse command-line arguments into three components: an integer, a symbol representing a mathematical operation, and a second integer.",
             "summary_checksum": "eaca0178f36ef4919aad47f57682c7e7",
-            "type": "chunk"
+            "type": "chunk",
+            "document": "src/interface.py:5-14\ndef parse_arguments():\n    parser = argparse.ArgumentParser(description=\"Basic Calculator\")\n    parser.add_argument(\"operation\", type=str, help=\"Calculation operation\")\n    args = parser.parse_args()\n\n    # use re to parse symbol, nubmer before, nubmer after\n    match = re.match(r\"(\\d+)(\\D)(\\d+)\", args.operation)\n    if match is None:\n        raise ValueError(\"Invalid operation\")\n    return int(match.group(1)), match.group(2), int(match.group(3))\n"
         },
         {
             "active": false,
@@ -158,7 +168,8 @@
             "ref": "src/interface.py:17-18",
             "summary": "Display the result of a mathematical operation to standard output.",
             "summary_checksum": "8d69c71cda68ed3dad02d2c6f8e31503",
-            "type": "chunk"
+            "type": "chunk",
+            "document": "src/interface.py:17-18\ndef render_response(result):\n    print(result)\n"
         },
         {
             "active": false,
@@ -167,7 +178,8 @@
             "ref": "src/operations.py:1-3,6-7,10-11,14-15,18-19,22",
             "summary": "No operation is performed in the provided code snippet as it only includes an import statement for the math library and no other executable code.",
             "summary_checksum": "d41598bfcfe2731338d393cd640de305",
-            "type": "chunk"
+            "type": "chunk",
+            "document": "src/operations.py:1-3,6-7,10-11,14-15,18-19,22\nimport math\n\n\n\n\n\n\n\n\n\n\n\n"
         },
         {
             "active": false,
@@ -176,7 +188,8 @@
             "ref": "src/operations.py:4-5",
             "summary": "Define an addition function that takes two arguments and returns their sum.",
             "summary_checksum": "55c3d4fb8a3bb7f1e5c414d11d08ade8",
-            "type": "chunk"
+            "type": "chunk",
+            "document": "src/operations.py:4-5\ndef add(a, b):\n    return a + b\n"
         },
         {
             "active": false,
@@ -185,7 +198,8 @@
             "ref": "src/operations.py:8-9",
             "summary": "Perform subtraction by taking two inputs, 'a' and 'b', and returning the result of 'a' - 'b'.",
             "summary_checksum": "64eb46f64d0361a7d7a2ef7e4afaf0f5",
-            "type": "chunk"
+            "type": "chunk",
+            "document": "src/operations.py:8-9\ndef subtract(a, b):\n    return a - b\n"
         },
         {
             "active": false,
@@ -194,7 +208,8 @@
             "ref": "src/operations.py:12-13",
             "summary": "Multiply two numbers and return the result.",
             "summary_checksum": "a1502efd32ac3ff71c767a1a31d359ef",
-            "type": "chunk"
+            "type": "chunk",
+            "document": "src/operations.py:12-13\ndef multiply(a, b):\n    return a * b\n"
         },
         {
             "active": false,
@@ -203,7 +218,8 @@
             "ref": "src/operations.py:16-17",
             "summary": "Perform division on two numbers, `a` and `b`, and return the result.",
             "summary_checksum": "108512db41db799615544198f287815d",
-            "type": "chunk"
+            "type": "chunk",
+            "document": "src/operations.py:16-17\ndef divide(a, b):\n    return a / b\n"
         },
         {
             "active": false,
@@ -212,7 +228,8 @@
             "ref": "src/operations.py:20-21",
             "summary": "Calculate the square root of a number using the math library's sqrt function.",
             "summary_checksum": "bff8880d50f1e5c5d011135b8c720b43",
-            "type": "chunk"
+            "type": "chunk",
+            "document": "src/operations.py:20-21\ndef sqrt(a):\n    return math.sqrt(a)\n"
         },
         {
             "active": false,
@@ -221,7 +238,8 @@
             "ref": "main.py:1-4,20-24",
             "summary": "Execute the main function if the script is run as the main program.",
             "summary_checksum": "5536373063c7333d4c35c6497b3862d6",
-            "type": "chunk"
+            "type": "chunk",
+            "document": "main.py:1-4,20-24\nfrom src.interface import parse_arguments, render_response\nfrom src.operations import add, divide, multiply, subtract\n\n\n\n\nif __name__ == \"__main__\":\n    main()\n\n"
         },
         {
             "active": false,
@@ -230,7 +248,8 @@
             "ref": "main.py:5-19",
             "summary": "Parse command-line arguments for a basic arithmetic operation, perform the corresponding calculation, and print the result. If an unsupported operation is specified, raise a ValueError.",
             "summary_checksum": "6be54ffb7a09c09c621330f1edef8687",
-            "type": "chunk"
+            "type": "chunk",
+            "document": "main.py:5-19\ndef main():\n    a, op, b = parse_arguments()\n\n    if op == \"+\":\n        result = add(a, b)\n    elif op == \"-\":\n        result = subtract(a, b)\n    elif op == \"*\":\n        result = multiply(a, b)\n    elif op == \"/\":\n        result = divide(a, b)\n    else:\n        raise ValueError(\"Unsupported operation\")\n\n    render_response(result)\n"
         }
     ],
     "links": [
diff --git a/tests/test_comments.py b/tests/test_comments.py
index 5b147ce..478c4d3 100644
--- a/tests/test_comments.py
+++ b/tests/test_comments.py
@@ -10,7 +10,7 @@ async def test_comment_render(git_history, mock_db):
     daemon = Daemon(cwd=git_history)
     await daemon.update(refresh=True)
 
-    context = ContextBuilder(daemon.graph, daemon.db)
+    context = ContextBuilder(daemon.graph)
     context.add_ref("src/operations.py")
     context.add_comment(
         "src/operations.py", {"comment": "What is this file for?"}, tags=["test-flag"]
diff --git a/tests/test_context.py b/tests/test_context.py
index 3191309..89003fc 100644
--- a/tests/test_context.py
+++ b/tests/test_context.py
@@ -8,12 +8,12 @@
 from ragdaemon.utils import get_document
 
 
-def test_daemon_render_context(cwd, mock_db):
+def test_daemon_render_context(cwd):
     path_str = Path("src/interface.py").as_posix()
     ref = path_str
 
     # Base Chunk
-    context = ContextBuilder(KnowledgeGraph(), mock_db)
+    context = ContextBuilder(KnowledgeGraph())
     context.context = {
         path_str: {
             "lines": set([1, 2, 3, 4, 15]),
@@ -99,7 +99,7 @@ def test_to_refs(cwd, mock_db):
     ref = path_str
 
     # Setup Context
-    context = ContextBuilder(KnowledgeGraph(), mock_db)
+    context = ContextBuilder(KnowledgeGraph())
     context.context = {
         path_str: {
             "lines": set([1, 2, 3, 4, 15]),