Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate away from allensdk #69

Merged
merged 7 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 137 additions & 26 deletions morphapi/api/allenmorphology.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,37 @@
import json
import logging
import os
from pathlib import Path

import numpy as np
import pandas as pd

try:
from allensdk.core.cell_types_cache import CellTypesCache
except ModuleNotFoundError:
raise ModuleNotFoundError(
"You need to install the allen sdk package to use "
'AllenMorphology: "pip install allensdk"'
)
import requests

from morphapi.morphology.morphology import Neuron
from morphapi.paths_manager import Paths
from morphapi.utils.data_io import connected_to_internet

logger = logging.getLogger(__name__)

columns_of_interest = {
"cell_reporter_status": "reporter_status",
"cell_soma_location": "cell_soma_location",
"donor__species": "species",
"specimen__id": "id",
"specimen__name": "name",
"structure__layer": "structure_layer_name",
"structure__id": "structure_area_id",
"structure_parent__acronym": "structure_area_abbrev",
"line_name": "transgenic_line",
"tag__dendrite_type": "dendrite_type",
"tag__apical": "apical",
"nr__reconstruction_type": "reconstruction_type",
"donor__disease_state": "disease_state",
"donor__id": "donor_id",
"specimen__hemisphere": "structure_hemisphere",
"csl__normalized_depth": "normalized_depth",
}


class AllenMorphology(Paths):
"""Handles the download of neuronal morphology data from the
Expand All @@ -36,18 +50,9 @@ def __init__(self, *args, **kwargs):

Paths.__init__(self, *args, **kwargs)

# Create a Cache for the Cell Types Cache API
self.ctc = CellTypesCache(
manifest_file=os.path.join(
self.allen_morphology_cache, "manifest.json"
)
)

# Get a list of cell metadata for neurons with reconstructions,
# download if necessary
self.neurons = pd.DataFrame(
self.ctc.get_cells(require_reconstruction=True)
)
self.neurons = self.get_cells(require_reconstruction=True)
self.n_neurons = len(self.neurons)

if not self.n_neurons:
Expand All @@ -58,13 +63,90 @@ def __init__(self, *args, **kwargs):

self.downloaded_neurons = self.get_downloaded_neurons()

def get_cells(self, require_reconstruction: bool = True) -> pd.DataFrame:
"""
Download the metadata for all neurons in the Allen database and save
it to a cells.json file.
"""
cells_path = Path(self.allen_morphology_cache) / "cells.json" # type: ignore[attr-defined]

if not cells_path.exists():
cells = self.fetch_all_cell_metadata(cells_path)
else:
cells = self.check_cell_metadata(cells_path)

cells["cell_soma_location"] = cells[
["csl__x", "csl__y", "csl__z"]
].apply(list, axis=1)
cells = cells[columns_of_interest.keys()].rename(
columns=columns_of_interest
)

if require_reconstruction:
cells.dropna(subset=["reconstruction_type"], inplace=True)
cells.reset_index(inplace=True, drop=True)

return cells

def fetch_all_cell_metadata(self, cells_path) -> pd.DataFrame:
"""
Fetches the metadata for all neurons in the Allen database and saves
it to a json file.

:param cells_path: Path to save the metadata to
"""
query = "https://api.brain-map.org/api/v2/data/query.json?criteria=model::ApiCellTypesSpecimenDetail,rma::options[num_rows$eqall]"

try:
r = requests.get(query)
with open(cells_path, "w") as f:
json.dump(r.json()["msg"], f, indent=4)
except requests.exceptions.RequestException as e:
logger.error(
"Could not fetch the neuron metadata for the following "
"reason: %s",
str(e),
)
raise e

return pd.read_json(cells_path)

def check_cell_metadata(self, cells_path) -> pd.DataFrame:
"""
Check if the metadata file is up-to-date and return the metadata
as a pandas DataFrame.

:param cells_path: Path to the metadata file
"""
# Query for all cell types but return no rows (check for total number)
query = "https://api.brain-map.org/api/v2/data/query.json?criteria=model::ApiCellTypesSpecimenDetail,rma::options[num_rows$eq0]"

cells = pd.read_json(cells_path)
try:
r = requests.get(query)
except requests.exceptions.RequestException as e:
logger.error(
"Could not check for metadata validity for the following "
"reason: %s",
str(e),
)
return cells

n_cells = r.json()["total_rows"]

if n_cells != len(cells):
logger.info("Updating neuron metadata")
cells = self.fetch_all_cell_metadata(cells_path)

return cells

def get_downloaded_neurons(self):
"""
Get's the path to files of downloaded neurons
Gets the path to files of downloaded neurons
"""
return [
os.path.join(self.allen_morphology_cache, f)
for f in os.listdir(self.allen_morphology_cache)
os.path.join(self.allen_morphology_cache, f) # type: ignore[attr-defined]
for f in os.listdir(self.allen_morphology_cache) # type: ignore[attr-defined]
if ".swc" in f
]

Expand All @@ -73,16 +155,15 @@ def build_filepath(self, neuron_id):
Build a filepath from neuron's metadata.
"""
return os.path.join(
self.allen_morphology_cache, "{}.swc".format(neuron_id)
self.allen_morphology_cache, "{}.swc".format(neuron_id) # type: ignore[attr-defined]
)

def download_neurons(self, ids, load_neurons=True, **kwargs):
"""
Download neurons and return neuron reconstructions (instances
of Neuron class)
Download neurons and return neuron reconstructions (instances
of Neuron class)

:param ids: list of integers with neurons IDs

"""
if isinstance(ids, np.ndarray):
ids = ids.tolist()
Expand All @@ -99,7 +180,7 @@ def download_neurons(self, ids, load_neurons=True, **kwargs):

# Download file
try:
self.ctc.get_reconstruction(neuron_id, file_name=neuron_file)
self.get_reconstruction(neuron_id, file_name=neuron_file)
except Exception as exc:
logger.error(
"Could not fetch the neuron %s "
Expand All @@ -120,3 +201,33 @@ def download_neurons(self, ids, load_neurons=True, **kwargs):
)

return neurons

def get_reconstruction(self, neuron_id: int, file_name: str):
"""
Download a neuron's reconstruction from the Allen database.

:param neuron_id: int, neuron ID
:param file_name: str, path to save the neuron's reconstruction to
"""
query_for_file_path = f"https://api.brain-map.org/api/v2/data/query.json?criteria=model::NeuronReconstruction,rma::criteria,[specimen_id$eq{neuron_id}],rma::include,well_known_files"

r = requests.get(query_for_file_path)
file_paths = r.json()["msg"][0]["well_known_files"]
file_path = None
for file in file_paths:
# There are 3 types of files for each reconstructed neuron:
# .png, .swc and marker_m.swc files. We want the plain .swc file
if ".png" not in file["path"] and "marker" not in file["path"]:
file_path = file["download_link"]
break

# Check to make sure a path to a reconstruction swc file was found
if not file_path:
raise ValueError(
f"Could not find a reconstruction file for neuron {neuron_id}"
)

query_file = f"https://api.brain-map.org{file_path}"
r = requests.get(query_file)
with open(file_name, "wb") as f:
f.write(r.content)
2 changes: 1 addition & 1 deletion morphapi/morphology/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def load_cached_neuron(self, neuron_name, _params):
}

for nn, act in loaded.items():
if len(act.points()) == 0:
if len(act.vertices) == 0:
loaded[nn] = None

return loaded
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dependencies = [
"numpy",
"pandas",
"pyyaml>=5.3",
"requests",
"retry",
"rich",
"vedo>=2023.5.0",
Expand Down Expand Up @@ -51,7 +52,6 @@ dev = [
"ruff",
"setuptools_scm",
"pytest-sugar",
"allensdk",
]

nb = ["jupyter", "k3d"]
Expand Down
Loading