Merge pull request #117 from Temigo/me

Include changes to clustering targets/labels
DeepLearnPhysics · Jul 27, 2022 · 5a446dd · 5a446dd
2 parents c08e204 + ecbf563
commit 5a446dd
Show file tree

Hide file tree

Showing 36 changed files with 1,416 additions and 1,978 deletions.
diff --git a/analysis/algorithms/calorimetry.py b/analysis/algorithms/calorimetry.py
@@ -1,13 +1,85 @@
 from analysis.classes.particle import Particle
 import numpy as np
 import numba as nb
+from sklearn.decomposition import PCA
 
 
 def compute_sum_deposited(particle : Particle):
     assert hasattr(particle, 'deposition')
     sum_E = particle.deposition.sum()
     return sum_E
 
+
+def compute_track_length(points, bin_size=17):
+    """
+    Compute track length by dividing it into segments
+    and computing a local PCA axis, then summing the
+    local lengths of the segments.
+
+    Parameters
+    ----------
+    points: np.ndarray
+        Shape (N, 3)
+    bin_size: int, optional
+        Size (in voxels) of the segments
+
+    Returns
+    -------
+    float
+    """
+    pca = PCA(n_components=2)
+    length = 0.
+    if len(points) >= 2:
+        coords_pca = pca.fit_transform(points)[:, 0]
+        bins = np.arange(coords_pca.min(), coords_pca.max(), bin_size)
+        # bin_inds takes values in [1, len(bins)]
+        bin_inds = np.digitize(coords_pca, bins)
+        for b_i in np.unique(bin_inds):
+            mask = bin_inds == b_i
+            if np.count_nonzero(mask) < 2: continue
+            # Repeat PCA locally for better measurement of dx
+            pca_axis = pca.fit_transform(points[mask])
+            dx = pca_axis[:, 0].max() - pca_axis[:, 0].min()
+            length += dx
+    return length
+
+
+def compute_particle_direction(p: Particle, start_segment_radius=17, vertex=None):
+    """
+    Given a Particle, compute the start direction. Within `start_segment_radius`
+    of the start point, find PCA axis and measure direction.
+
+    If not start point is found, returns (-1, -1, -1).
+
+    Parameters
+    ----------
+    p: Particle
+    start_segment_radius: float, optional
+
+    Returns
+    -------
+    np.ndarray
+        Shape (3,)
+    """
+    pca = PCA(n_components=2)
+    direction = None
+    if hasattr(p, "startpoint") and p.startpoint[0] >= 0.:
+        startpoint = p.startpoint
+        if hasattr(p, "endpoint") and vertex is not None: # make sure we pick the one closest to vertex
+            use_end = np.argmin([
+                np.sqrt(((vertex-p.startpoint)**2).sum()),
+                np.sqrt(((vertex-p.endpoint)**2).sum())
+            ])
+            startpoint = p.endpoint if use_end else p.startpoint
+        d = np.sqrt(((p.points - startpoint)**2).sum(axis=1))
+        if (d < start_segment_radius).sum() >= 2:
+            direction = pca.fit(p.points[d < start_segment_radius]).components_[0, :]
+    if direction is None: # we could not find a startpoint
+        if len(p.points) >= 2: # just all voxels
+            direction = pca.fit(p.points).components_[0, :]
+        else:
+            direction = np.array([-1, -1, -1])
+    return direction
 # TODO:
 # def proton_energy_tabular(particle: Particle):
 #     assert particle.pid == 4  # Proton
@@ -16,4 +88,4 @@ def compute_sum_deposited(particle : Particle):
 
 # def multiple_coulomb_scattering(particle: Particle):
 #     assert particle.pid == 2  # Muon
-#     pass
+#     pass
diff --git a/analysis/algorithms/selections/__init__.py b/analysis/algorithms/selections/__init__.py
@@ -1,4 +1,5 @@
 from .stopping_muons import stopping_muons
 from .through_going_muons import through_going_muons
 from .michel_electrons import michel_electrons
-from .example_nue import debug_pid
+from .example_nue import debug_pid
+from .statistics import statistics
diff --git a/analysis/algorithms/selections/example_nue.py b/analysis/algorithms/selections/example_nue.py
@@ -7,6 +7,7 @@
 
 from pprint import pprint
 import time
+import numpy as np
 
 
 @evaluate(['interactions', 'particles'], mode='per_batch')
@@ -18,34 +19,73 @@ def debug_pid(data_blob, res, data_idx, analysis_cfg, cfg):
 
     predictor = FullChainEvaluator(data_blob, res, cfg, analysis_cfg)
     image_idxs = data_blob['index']
-    for i, index in enumerate(image_idxs):
+    for idx, index in enumerate(image_idxs):
 
         # Process Interaction Level Information
-        matches, counts = predictor.match_interactions(i,
+        matches, counts = predictor.match_interactions(idx,
             mode='true_to_pred',
             match_particles=True,
             drop_nonprimary_particles=primaries,
             return_counts=True)
 
+        matched_pred_indices = []
+        for i, interaction_pair in enumerate(matches):
+            true_int, pred_int = interaction_pair[0], interaction_pair[1]
+            if pred_int is not None:
+                matched_pred_indices.append(pred_int.id)
+
+        pred_interactions = predictor.get_interactions(idx, drop_nonprimary_particles=primaries)
+        for int in pred_interactions:
+            if int.id not in matched_pred_indices:
+                matches.append((None, int))
+                if isinstance(counts, list) or isinstance(counts, np.ndarray):
+                    counts = np.concatenate([counts, [0]])
+                else: counts = np.array([counts, 0])
+
         for i, interaction_pair in enumerate(matches):
             true_int, pred_int = interaction_pair[0], interaction_pair[1]
             true_int_dict = count_primary_particles(true_int, prefix='true')
             pred_int_dict = count_primary_particles(pred_int, prefix='pred')
-            if pred_int is None:
-                pred_int_dict['true_interaction_matched'] = False
-            else:
-                pred_int_dict['true_interaction_matched'] = True
-            true_int_dict['true_nu_id'] = true_int.nu_id
+            pred_int_dict['true_interaction_matched'] = False
+            if true_int is not None and pred_int is not None:
+                    pred_int_dict['true_interaction_matched'] = True
+            # Store neutrino information
+            true_int_dict['true_nu_id'] = -1
+            true_int_dict['true_nu_interaction_type'] = -1
+            true_int_dict['true_nu_interaction_mode'] = -1
+            true_int_dict['true_nu_current_type'] = -1
+            true_int_dict['true_nu_energy'] = -1
+            if true_int is not None:
+                true_int_dict['true_nu_id'] = true_int.nu_id
+            if 'neutrino_asis' in data_blob and true_int is not None and true_int.nu_id > 0:
+                # assert 'particles_asis' in data_blob
+                # particles = data_blob['particles_asis'][i]
+                neutrinos = data_blob['neutrino_asis'][idx]
+                if len(neutrinos) > 1 or len(neutrinos) == 0: continue
+                nu = neutrinos[0]
+                # Get larcv::Particle objects for each
+                # particle of the true interaction
+                # true_particles = np.array(particles)[np.array([p.id for p in true_int.particles])]
+                # true_particles_track_ids = [p.track_id() for p in true_particles]
+                # for nu in neutrinos:
+                #     if nu.mct_index() not in true_particles_track_ids: continue
+                true_int_dict['true_nu_interaction_type'] = nu.interaction_type()
+                true_int_dict['true_nu_interaction_mode'] = nu.interaction_mode()
+                true_int_dict['true_nu_current_type'] = nu.current_type()
+                true_int_dict['true_nu_energy'] = nu.energy_init()
+
             pred_int_dict['interaction_match_counts'] = counts[i]
             interactions_dict = OrderedDict({'Index': index})
             interactions_dict.update(true_int_dict)
             interactions_dict.update(pred_int_dict)
             interactions.append(interactions_dict)
 
             # Process particle level information
-            pred_particles, true_particles = [], true_int.particles
+            pred_particles, true_particles = [], []
             if pred_int is not None:
                 pred_particles = pred_int.particles
+            if true_int is not None:
+                true_particles = true_int.particles
             matched_particles, _, ious = match_particles_fn(true_particles,
                                                             pred_particles)
             for i, m in enumerate(matched_particles):
@@ -57,147 +97,32 @@ def debug_pid(data_blob, res, data_idx, analysis_cfg, cfg):
                 true_particle_dict = get_particle_properties(true_p,
                     vertex=true_int.vertex,
                     prefix='true')
-                if pred_p is not None:
+                pred_particle_dict['true_particle_is_matched'] = False
+                if pred_p is not None and true_p is not None:
                     pred_particle_dict['true_particle_is_matched'] = True
-                else:
-                    pred_particle_dict['true_particle_is_matched'] = False
+
                 pred_particle_dict['particle_match_counts'] = ious[i]
 
+                true_particle_dict['true_interaction_id'] = true_int.id
+                pred_particle_dict['pred_interaction_id'] = pred_int.id
+
+                true_particle_dict['true_particle_energy_init'] = -1
+                true_particle_dict['true_particle_energy_deposit'] = -1
+                true_particle_dict['true_particle_children_count'] = -1
+                if 'particles_asis' in data_blob:
+                    particles_asis = data_blob['particles_asis'][idx]
+                    if len(particles_asis) > true_p.id:
+                        true_part = particles_asis[true_p.id]
+                        true_particle_dict['true_particle_energy_init'] = true_part.energy_init()
+                        true_particle_dict['true_particle_energy_deposit'] = true_part.energy_deposit()
+                        # If no children other than itself: particle is stopping.
+                        children = true_part.children_id()
+                        children = [x for x in children if x != true_part.id()]
+                        true_particle_dict['true_particle_children_count'] = len(children)
+
                 particles_dict.update(pred_particle_dict)
                 particles_dict.update(true_particle_dict)
 
                 particles.append(particles_dict)
 
     return [interactions, particles]
-
-
-
-@evaluate(['interactions_test', 'node_features', 'edge_features'], mode='per_batch')
-def test_selection(data_blob, res, data_idx, analysis_cfg, cfg):
-
-    # Set default fieldnames and values. (Needed for logger to work)
-    interactions_tp = []
-    node_df = []
-    deghosting = analysis_cfg['analysis']['deghosting']
-    primaries = analysis_cfg['analysis']['match_primaries']
-
-    predictor = FullChainEvaluator(data_blob, res, cfg, analysis_cfg, deghosting=deghosting)
-    image_idxs = data_blob['index']
-
-    # Match True interactions to Predicted interactions
-
-    for i, index in enumerate(image_idxs):
-
-        print('-------------------Index: {}---------------------'.format(index))
-
-        matches = predictor.match_interactions(i, mode='tp', match_particles=True, primaries=primaries)
-
-        for interaction_pair in matches:
-            true_int, pred_int = interaction_pair[0], interaction_pair[1]
-            if pred_int is not None:
-                pred_particles, true_particles = pred_int.particles, true_int.particles
-            else:
-                pred_particles, true_particles = [], true_int.particles
-
-            # Match true particles to predicted particles
-            matched_particles, _, _ = match(true_particles, pred_particles)
-
-            if pred_int is None:
-                print("No predicted interaction match = ", matched_particles)
-                true_count_primary_leptons = true_int.primary_particle_counts[1] \
-                                            + true_int.primary_particle_counts[2]
-                true_count_primary_particles = sum(true_int.primary_particle_counts.values())
-                for p in true_int.particles:
-                    update_dict = OrderedDict({
-                        'index': index,
-                        'pred_interaction_id': -1,
-                        'true_interaction_id': true_int.id,
-                        'pred_particle_type': -1,
-                        'pred_particle_size': -1,
-                        'pred_particle_is_primary': False,
-                        'pred_particle_is_matched': False,
-                        'true_particle_type': p.pid,
-                        'true_particle_size': p.size,
-                        'true_particle_is_primary': False,
-                        'true_particle_is_matched': False,
-                        'pred_count_primary_leptons': 0,
-                        'pred_count_primary_particles': 0,
-                        'true_count_primary_leptons': true_count_primary_leptons,
-                        'true_count_primary_particles': true_count_primary_particles,
-                        'true_interaction_is_matched': False,
-                        'pred_particle_E': -1,
-                        'true_particle_E': p.sum_edep})
-                    interactions_tp.append(update_dict)
-
-            else:
-                true_count_primary_leptons = true_int.primary_particle_counts[1] \
-                                           + true_int.primary_particle_counts[2]
-                true_count_primary_particles = sum(true_int.primary_particle_counts.values())
-
-                pred_count_primary_leptons = {}
-                pred_count_primary_particles = {}
-
-                update_dicts = []
-
-                for p in pred_particles:
-                    if p.is_primary:
-                        # Count matched primaries
-                        pred_count_primary_particles[p.id] = True
-                        if (p.pid == 1 or p.pid == 2):
-                            # Count matched primary leptons
-                            pred_count_primary_leptons[p.id] = True
-
-                for m in matched_particles:
-                    update_dict = OrderedDict({
-                        'index': index,
-                        'pred_interaction_id': -1,
-                        'true_interaction_id': true_int.id,
-                        'pred_particle_type': -1,
-                        'pred_particle_size': -1,
-                        'pred_particle_is_primary': False,
-                        'pred_particle_is_matched': False,
-                        'true_particle_type': -1,
-                        'true_particle_size': -1,
-                        'true_particle_is_primary': False,
-                        'true_particle_is_matched': False,
-                        'pred_count_primary_leptons': 0,
-                        'pred_count_primary_particles': 0,
-                        'true_count_primary_leptons': true_count_primary_leptons,
-                        'true_count_primary_particles': true_count_primary_particles,
-                        'true_interaction_is_matched': True,
-                        'pred_particle_E': -1,
-                        'true_particle_E': -1})
-
-                    update_dict['pred_interaction_id'] = pred_int.id
-                    update_dict['true_interaction_id'] = true_int.id
-
-                    p1, p2 = m
-
-                    if p2 is not None:
-                        p2.is_matched = True
-                        update_dict['pred_particle_type'] = p2.pid
-                        update_dict['pred_particle_size'] = p2.size
-                        update_dict['pred_particle_E'] = p2.sum_edep
-                        update_dict['pred_particle_is_primary'] = p2.is_primary
-                        update_dict['pred_particle_is_matched'] = True
-                        update_dict['true_particle_is_matched'] = True
-
-                    update_dict['true_particle_type'] = p1.pid
-                    update_dict['true_particle_size'] = p1.size
-                    update_dict['true_particle_is_primary'] = p1.is_primary
-                    update_dict['true_particle_E'] = p1.sum_edep
-
-                    update_dicts.append(update_dict)
-
-                    node_dict = OrderedDict({'node_feat_{}'.format(i) : p2.node_features[i] \
-                        for i in range(p2.node_features.shape[0])})
-
-                    node_df.append(node_dict)
-
-
-                for d in update_dicts:
-                    d['pred_count_primary_leptons'] = sum(pred_count_primary_leptons.values())
-                    d['pred_count_primary_particles'] = sum(pred_count_primary_particles.values())
-                    interactions_tp.append(d)
-
-    return [interactions_tp, node_df]
diff --git a/analysis/algorithms/selections/michel_electrons.py b/analysis/algorithms/selections/michel_electrons.py
@@ -80,6 +80,8 @@ def michel_electrons(data_blob, res, data_idx, analysis_cfg, cfg):
     ablation_radius    = processor_cfg.get('ablation_radius', 15)
     ablation_min_samples = processor_cfg.get('ablation_min_samples', 5)
     shower_threshold   = processor_cfg.get('shower_threshold', 10)
+    fiducial_threshold = processor_cfg.get('fiducial_threshold', 30)
+    muon_min_voxel_count = processor_cfg.get('muon_min_voxel_count', 30)
     matching_mode      = processor_cfg.get('matching_mode', 'true_to_pred')
     #
     # ====== Initialization ======
@@ -105,6 +107,7 @@ def michel_electrons(data_blob, res, data_idx, analysis_cfg, cfg):
 
             for tp in true_particles:
                 if tp.semantic_type != michel_label: continue
+                if not predictor.is_contained(tp.points, threshold=fiducial_threshold): continue
                 p = data_blob['particles_asis'][i][tp.id]
 
                 michel_is_attached_at_edge = False
@@ -114,6 +117,7 @@ def michel_electrons(data_blob, res, data_idx, analysis_cfg, cfg):
                 for tp2 in true_particles:
                     if tp2.semantic_type != track_label: continue
                     if tp2.pid != muon_label and tp2.pid != pion_label: continue
+                    if tp2.size < muon_min_voxel_count: continue
                     d = cdist(tp.points, tp2.points).min()
                     attached_at_edge, cluster_count, old_cluster_count = is_attached_at_edge(tp.points, tp2.points,
                                                                     attached_threshold=attached_threshold,
@@ -159,11 +163,13 @@ def michel_electrons(data_blob, res, data_idx, analysis_cfg, cfg):
         # Loop over predicted particles
         for p in pred_particles:
             if p.semantic_type != michel_label: continue
+            if not predictor.is_contained(p.points, threshold=fiducial_threshold): continue
 
             # Check whether it is attached to the edge of a track
             michel_is_attached_at_edge = False
             for p2 in pred_particles:
                 if p2.semantic_type != track_label: continue
+                if p2.size < muon_min_voxel_count: continue
                 if not is_attached_at_edge(p.points, p2.points,
                                         attached_threshold=attached_threshold,
                                         one_pixel=one_pixel,