Merge pull request mesoscale-activity-map#262 from datajoint-company/…

…master minor improvement to ingest pykilosort - update NWB export (include raw data)
hanhou · Nov 29, 2022 · 979935c · 979935c
2 parents 7ca29f1 + d0d0240
commit 979935c
Show file tree

Hide file tree

Showing 7 changed files with 269 additions and 48 deletions.
diff --git a/pipeline/ephys.py b/pipeline/ephys.py
@@ -255,7 +255,7 @@ class ClusteringMethod(dj.Lookup):
     # jrclust_v3 is the version Dave uses
     # jrclust_v4 is the version Susu uses
 
-    contents = zip(['jrclust_v3', 'kilosort', 'jrclust_v4', 'kilosort2'])
+    contents = zip(['jrclust_v3', 'kilosort', 'jrclust_v4', 'kilosort2', 'pykilosort2.5'])
 
 
 @schema

diff --git a/pipeline/export/nwb.py b/pipeline/export/nwb.py
diff --git a/pipeline/histology.py b/pipeline/histology.py
@@ -115,6 +115,12 @@ class ElectrodePosition(dj.Part):
         -> ElectrodeCCFPosition.ElectrodePosition
         """
 
+    class ElectrodePositionError(dj.Part):
+        definition = """
+        -> master
+        -> ElectrodeCCFPosition.ElectrodePositionError
+        """
+
     @property
     def key_source(self):
         """
@@ -124,7 +130,10 @@ def key_source(self):
                                                    elec_count='count(electrode)')
         ccf_electrodes = ElectrodeCCFPosition.aggr(ephys.ProbeInsertion * ElectrodeCCFPosition.ElectrodePosition,
                                                    ccf_count='count(electrode)')
-        return ElectrodeCCFPosition & (all_electrodes * ccf_electrodes & 'elec_count > ccf_count')
+        ccf_error_electrodes = ElectrodeCCFPosition.aggr(ephys.ProbeInsertion * ElectrodeCCFPosition.ElectrodePositionError,
+                                                         ccf_err_count='count(electrode)', keep_all_rows=True)
+        return ElectrodeCCFPosition & (all_electrodes * ccf_electrodes * ccf_error_electrodes
+                                       & 'elec_count > ccf_count + ccf_err_count')
 
     def make(self, key):
         from scipy import interpolate
@@ -178,7 +187,12 @@ def make(self, key):
             try:
                 ElectrodeCCFPosition.ElectrodePosition.insert1(filled_position, allow_direct_insert=True)
             except dj.errors.IntegrityError:
-                pass  # skip CCF coords outside the brain
+                if not (ElectrodeCCFPosition.ElectrodePositionError & {**econfig_key,
+                                                                       'electrode_group': r.electrode_group,
+                                                                       'electrode': r.name,
+                                                                       'ccf_label_id': ccf_label_id}):
+                    ElectrodeCCFPosition.ElectrodePositionError.insert1(filled_position, allow_direct_insert=True)
+                    self.ElectrodePositionError.insert1(filled_position)
             else:
                 self.ElectrodePosition.insert1(filled_position)
 

diff --git a/pipeline/ingest/ephys.py b/pipeline/ingest/ephys.py
@@ -117,7 +117,7 @@ def _load(self, data, probe, npx_meta, rigpath, probe_insertion_exists=False, in
                 (units, spikes, spike_sites, spike_depths), repeat((units > 0))))
 
         # scale amplitudes by uV/bit scaling factor (for kilosort2)
-        if method in ['kilosort2']:
+        if 'kilosort' in method:
             if 'qc' not in clustering_label:
                 bit_volts = npx_bit_volts[re.match('neuropixels (\d.0)', npx_meta.probe_model).group()]
                 unit_amp = unit_amp * bit_volts
@@ -249,7 +249,7 @@ def _load(self, data, probe, npx_meta, rigpath, probe_insertion_exists=False, in
                 for i, u in enumerate(unit_set):
                     if method in ['jrclust_v3', 'jrclust_v4']:
                         wf_chn_idx = 0
-                    elif method in ['kilosort2']:
+                    elif 'kilosort' in method:
                         wf_chn_idx = np.where(data['ks_channel_map'] == vmax_unit_site[i])[0][0]
                     ib.insert1({**archive_key,
                                 **site2electrode_map[vmax_unit_site[i]],
@@ -300,7 +300,7 @@ def _load(self, data, probe, npx_meta, rigpath, probe_insertion_exists=False, in
                 for i, u in enumerate(unit_set):
                     if method in ['jrclust_v3', 'jrclust_v4']:
                         wf_chn_idx = 0
-                    elif method in ['kilosort2']:
+                    elif 'kilosort' in method:
                         wf_chn_idx = np.where(data['ks_channel_map'] == vmax_unit_site[i])[0][0]
 
                     ib.insert1({**skey, **insertion_key,
@@ -462,7 +462,7 @@ def ingest_units(insertion_key, data, npx_meta):
             (units, spikes, spike_sites, spike_depths), repeat((units > 0))))
 
     # scale amplitudes by uV/bit scaling factor (for kilosort2)
-    if method in ['kilosort2']:
+    if 'kilosort' in method:
         if 'qc' not in clustering_label:
             bit_volts = npx_bit_volts[re.match('neuropixels (\d.0)', npx_meta.probe_model).group()]
             unit_amp = unit_amp * bit_volts
@@ -580,7 +580,7 @@ def ingest_units(insertion_key, data, npx_meta):
         for i, u in enumerate(unit_set):
             if method in ['jrclust_v3', 'jrclust_v4']:
                 wf_chn_idx = 0
-            elif method in ['kilosort2']:
+            elif 'kilosort' in method:
                 wf_chn_idx = np.where(data['ks_channel_map'] == vmax_unit_site[i])[0][0]
 
             ib.insert1({**insertion_key,
@@ -793,6 +793,7 @@ def do_ephys_ingest(session_key, replace=False, probe_insertion_exists=False, in
     Perform ephys-ingestion for a particular session (defined by session_key) to either
         + fresh ingest of new probe insertion and clustering results
         + archive existing clustering results and replace with new one (set 'replace=True')
+        + ingest directly into archive (set 'into_archive=True')
     """
     # =========== Find Ephys Recording ============
     sinfo = ((lab.WaterRestriction
@@ -835,7 +836,7 @@ def do_insert():
         # do the insertion per probe for all probes
         for probe_no, (f, cluster_method, npx_meta) in clustering_files.items():
             insertion_key = {'subject_id': sinfo['subject_id'], 'session': sinfo['session'], 'insertion_number': probe_no}
-            if probe_insertion_exists and (ephys.Unit & insertion_key):
+            if probe_insertion_exists and (ephys.Unit & insertion_key) and not into_archive:
                 # if probe_insertion exists and there exists also units for this insertion_key, skip over it
                 continue
             try:
@@ -875,7 +876,7 @@ def extend_ephys_ingest(session_key):
 
 def archive_ingested_clustering_results(key, archive_trial_spike=False):
     """
-    The input-argument "key" should be at the level of ProbeInsertion or its anscestor.
+    The input-argument "key" should be at the level of ProbeInsertion or its ancestor.
 
     1. Copy to ephys.ArchivedUnit
     2. Delete ephys.Unit

diff --git a/pipeline/ingest/utils/spike_sorter_loader.py b/pipeline/ingest/utils/spike_sorter_loader.py
@@ -324,7 +324,7 @@ def _load_kilosort2(sinfo, ks_dir, npx_dir):
         'sinfo': sinfo,
         'ef_path': ks_dir,
         'skey': skey,
-        'method': 'kilosort2',
+        'method': 'pykilosort2.5' if 'pyks25' in ks_dir else 'kilosort2',
         'hz': hz,
         'spikes': spike_times.astype(int),
         'spike_sites': ks.data['spike_sites'] + 1,  # channel numbering in this pipeline is 1-based indexed
@@ -934,8 +934,8 @@ def extract_curated_cluster_notes(self):
 
     def extract_cluster_noise_label(self):
         """
-        # labeling based on the noiseTemplate module - output to "cluster_group.tsv" file
-        # (https://github.com/jenniferColonell/ecephys_spike_sorting/tree/master/ecephys_spike_sorting/modules/noise_templates)
+        labeling based on the noiseTemplate module - output to "cluster_group.tsv" file
+        (https://github.com/jenniferColonell/ecephys_spike_sorting/tree/master/ecephys_spike_sorting/modules/noise_templates)
         """
         noise_labels = {}
         cluster_group_tsv = pathlib.Path(self._kilosort_dir) / 'cluster_group.tsv'

diff --git a/requirements.txt b/requirements.txt
@@ -15,5 +15,8 @@ openpyxl
 statsmodels==0.12.2	# oralfacial_analysis.GLMFit error
 astropy
 statannot
-h5py==3.3.0
-pynwb==2.0.1
+pynwb==2.2.0
+spikeinterface
+nwb-conversion-tools
+nwbinspector
+opencv-python
diff --git a/scripts/delay_response_NWB_export_Nov2022.py b/scripts/delay_response_NWB_export_Nov2022.py
@@ -0,0 +1,42 @@
+import os
+import datajoint as dj
+import pathlib
+
+from pipeline import lab, experiment, ephys
+from pipeline.export.nwb import export_recording
+
+
+output_dir = pathlib.Path(r'D:/map/NWB_EXPORT/delay_response')
+
+subjects_to_export = ("SC011", "SC013", "SC015", "SC016", "SC017",
+                      "SC022", "SC023", "SC026", "SC027", "SC030",
+                      "SC031", "SC032", "SC033", "SC035", "SC038",
+                      "SC043", "SC045", "SC048", "SC049", "SC050",
+                      "SC052", "SC053", "SC060", "SC061", "SC064",
+                      "SC065", "SC066", "SC067")
+
+
+def main(limit=None):
+    subject_keys = (lab.Subject * lab.WaterRestriction.proj('water_restriction_number')
+                    & f'water_restriction_number in {subjects_to_export}').fetch('KEY')
+    session_keys = (experiment.Session & ephys.Unit & subject_keys).fetch('KEY', limit=limit)
+    export_recording(session_keys, output_dir=output_dir, overwrite=False, validate=False)
+
+
+dandiset_id = os.getenv('DANDISET_ID')
+dandi_api_key = os.getenv('DANDI_API_KEY')
+
+
+def publish_to_dandi(dandiset_id, dandi_api_key):
+    from element_interface.dandi import upload_to_dandi
+
+    dandiset_dir = output_dir / 'dandi'
+    dandiset_dir.mkdir(parents=True, exist_ok=True)
+
+    upload_to_dandi(
+        data_directory=output_dir,
+        dandiset_id=dandiset_id,
+        staging=False,
+        working_directory=dandiset_dir,
+        api_key=dandi_api_key,
+        sync=True)