From 058672e20d02a5073609f9dfa4ac0a1611ec9efb Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Tue, 2 Aug 2022 15:30:58 -0700 Subject: [PATCH 01/52] Trainling spaces removed --- mlreco/models/layers/common/momentum.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mlreco/models/layers/common/momentum.py b/mlreco/models/layers/common/momentum.py index d0d1c04f..ac9d4c11 100644 --- a/mlreco/models/layers/common/momentum.py +++ b/mlreco/models/layers/common/momentum.py @@ -44,8 +44,8 @@ class VertexNet(MomentumNet): Small MLP for handling vertex regression and particle primary prediction. ''' def __init__(self, num_input, num_output=1, num_hidden=128): - super(VertexNet, self).__init__(num_input, num_output=num_output, - num_hidden=num_hidden, + super(VertexNet, self).__init__(num_input, num_output=num_output, + num_hidden=num_hidden, positive_outputs=False) def forward(self, x): # if x.shape[0] > 1: @@ -72,7 +72,7 @@ class DeepVertexNet(nn.Module): node_y = torch.randn(16, 5) edge_feature_x2y = net(node_x, node_y) # (16, 5) ''' - def __init__(self, num_input, num_output=1, num_hidden=512, num_layers=5, + def __init__(self, num_input, num_output=1, num_hidden=512, num_layers=5, positive_outputs=False): super(DeepVertexNet, self).__init__() self.linear = nn.ModuleList() @@ -104,7 +104,7 @@ def forward(self, x): class EvidentialMomentumNet(nn.Module): - def __init__(self, num_input, num_output=4, + def __init__(self, num_input, num_output=4, num_hidden=128, eps=0.0, logspace=False): super(EvidentialMomentumNet, self).__init__() self.linear1 = nn.Linear(num_input, num_hidden) @@ -137,10 +137,10 @@ def forward(self, x): vab = self.softplus(x[:, :3]) + self.eps alpha = torch.clamp(vab[:, 1] + 1.0, min=1.0).view(-1, 1) gamma = 2.0 * self.gamma(x[:, 3]).view(-1, 1) - out = torch.cat([gamma, vab[:, 0].view(-1, 1), + out = torch.cat([gamma, vab[:, 0].view(-1, 1), alpha, vab[:, 2].view(-1, 1)], dim=1) if not self.logspace: evidence = torch.clamp(out, min=self.eps) else: - evidence = out - return evidence \ No newline at end of file + evidence = out + return evidence From fbb299e2a48e2b88766da3841d5720a9da94cb2b Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Tue, 2 Aug 2022 17:10:09 -0700 Subject: [PATCH 02/52] Handle empty edge_index case gracefully in the edge_assignment_score function --- mlreco/utils/gnn/evaluation.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mlreco/utils/gnn/evaluation.py b/mlreco/utils/gnn/evaluation.py index eb0b0346..978d4f34 100644 --- a/mlreco/utils/gnn/evaluation.py +++ b/mlreco/utils/gnn/evaluation.py @@ -199,7 +199,7 @@ def grouping_loss(pred_mat: nb.float32[:], @nb.njit(cache=True) def edge_assignment_score(edge_index: nb.int64[:,:], edge_scores: nb.float32[:,:], - n: nb.int64) -> (nb.int64[:,:], nb.float32): + n: nb.int64) -> (nb.int64[:,:], nb.int64[:], nb.float32): """ Function that finds the graph that produces the lowest grouping score iteratively adding the most likely edges, @@ -208,11 +208,16 @@ def edge_assignment_score(edge_index: nb.int64[:,:], Args: edge_index (np.ndarray) : (E,2) Incidence matrix edge_scores (np.ndarray): (E,2) Two-channel edge score - n (int) : Total number of clusters C + n (int) : Total number of clusters C Returns: np.ndarray: (E',2) Optimal incidence matrix + np.ndarray: (C) Optimal group ID for each node float : Score for the optimal incidence matrix """ + # If there is no edge, do not bother + if not len(edge_index): + return np.empty((2,0), dtype=np.int64), np.zeros(n, dtype=np.int64), 0. + # Build an input adjacency matrix to constrain the edge selection to the input graph adj_mat = adjacency_matrix(edge_index, n) From 113dec6435afab1be2241c4e5f9ed755e32092e1 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Tue, 2 Aug 2022 23:40:42 -0700 Subject: [PATCH 03/52] Typing bug fix for images with no particles in cluster parser --- mlreco/iotools/parsers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlreco/iotools/parsers/cluster.py b/mlreco/iotools/parsers/cluster.py index cb2dab54..d139f301 100644 --- a/mlreco/iotools/parsers/cluster.py +++ b/mlreco/iotools/parsers/cluster.py @@ -146,7 +146,7 @@ def parse_cluster3d(cluster_event, labels['vtx_x'] = np.array([p.ancestor_position().x() for p in particles_asis_v]) labels['vtx_y'] = np.array([p.ancestor_position().y() for p in particles_asis_v]) labels['vtx_z'] = np.array([p.ancestor_position().z() for p in particles_asis_v]) - labels['primary_group'] = np.array((nu_ids > 0) & np.array([p.group_id()==p.parent_id() for p in particles_v]), dtype=np.float32) + labels['primary_group'] = np.array((nu_ids > 0) & np.array([p.group_id()==p.parent_id() for p in particles_v], dtype=bool), dtype=np.float32) labels['sem'] = np.array([p.shape() for p in particles_v]) # Loop over clusters, store info From d1a410580c8c439af63efb2f810dc5971a0afe9b Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Fri, 5 Aug 2022 09:44:18 -0700 Subject: [PATCH 04/52] Fix segfault related to empty input to UResNet in the full chian --- mlreco/models/full_chain.py | 4 ++-- mlreco/models/uresnet.py | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/mlreco/models/full_chain.py b/mlreco/models/full_chain.py index bb523231..85576788 100644 --- a/mlreco/models/full_chain.py +++ b/mlreco/models/full_chain.py @@ -213,7 +213,7 @@ def full_chain_cnn(self, input): input[0][deghost, 4] = charges result.update({'input_rescaled':[input[0][deghost,:5]]}) if self.enable_uresnet: - if self.enable_charge_rescaling: + if self.enable_charge_rescaling and deghost.sum() > 0: assert not self.uresnet_lonely.ghost result.update(self.uresnet_lonely([input[0][deghost, :4+self.input_features]])) else: @@ -232,7 +232,7 @@ def full_chain_cnn(self, input): ppn_input['decoderTensors'][0]) result.update(ppn_output) - if self.enable_charge_rescaling: + if self.enable_charge_rescaling and deghost.sum() > 0: # Reshape output tensors of UResNet and PPN to be of the original shape for key in ['segmentation', 'points', 'classify_endpoints', 'mask_ppn', 'ppn_coords', 'ppn_layers']: res = result[key][0] if isinstance(result[key][0], torch.Tensor) else result[key][0][-1] diff --git a/mlreco/models/uresnet.py b/mlreco/models/uresnet.py index c103f5cc..337ecc43 100644 --- a/mlreco/models/uresnet.py +++ b/mlreco/models/uresnet.py @@ -282,19 +282,19 @@ def forward(self, result, label, weights=None): if self._ghost: results = { - 'accuracy': uresnet_acc/count, - 'loss': (self._alpha * uresnet_loss + self._beta * mask_loss)/count, - 'ghost_mask_acc': mask_acc / count, - 'ghost_mask_loss': self._beta * mask_loss / count, - 'uresnet_loss': self._alpha * uresnet_loss / count, - 'uresnet_acc': uresnet_acc / count, - 'ghost2ghost': ghost2ghost / count, - 'nonghost2nonghost': nonghost2nonghost / count + 'accuracy': uresnet_acc/count if count else 0., + 'loss': (self._alpha * uresnet_loss + self._beta * mask_loss)/count if count else (self._alpha * uresnet_loss + self._beta * mask_loss), + 'ghost_mask_acc': mask_acc / count if count else 0., + 'ghost_mask_loss': self._beta * mask_loss / count if count else self._beta * mask_los, + 'uresnet_loss': self._alpha * uresnet_loss / count if count else self._alpha * uresnet_loss, + 'uresnet_acc': uresnet_acc / count if count else 0., + 'ghost2ghost': ghost2ghost / count if count else 0., + 'nonghost2nonghost': nonghost2nonghost / count if count else 0. } else: results = { - 'accuracy': uresnet_acc/count, - 'loss': uresnet_loss/count + 'accuracy': uresnet_acc/count if count else 0., + 'loss': uresnet_loss/count if count else uresnet_loss } for c in range(self._num_classes): if count_class[c] > 0: From e356ec3aa37053cb13571d3d4fbdc89222030f44 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Thu, 11 Aug 2022 00:33:54 -0700 Subject: [PATCH 05/52] Fixed segfault in edge_assignment_score when the edge_index is empty --- mlreco/models/full_chain.py | 4 +++- mlreco/utils/gnn/data.py | 2 +- mlreco/utils/gnn/evaluation.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/mlreco/models/full_chain.py b/mlreco/models/full_chain.py index 85576788..e1334d32 100644 --- a/mlreco/models/full_chain.py +++ b/mlreco/models/full_chain.py @@ -235,6 +235,7 @@ def full_chain_cnn(self, input): if self.enable_charge_rescaling and deghost.sum() > 0: # Reshape output tensors of UResNet and PPN to be of the original shape for key in ['segmentation', 'points', 'classify_endpoints', 'mask_ppn', 'ppn_coords', 'ppn_layers']: + if key not in result: continue res = result[key][0] if isinstance(result[key][0], torch.Tensor) else result[key][0][-1] tensor = torch.zeros((input[0].shape[0], res.shape[1]), dtype=res.dtype, device=res.device) tensor[deghost] = res @@ -242,7 +243,8 @@ def full_chain_cnn(self, input): result[key][0] = tensor else: result[key][0][-1] = tensor - result['ppn_output_coordinates'][0] = input[0][:,:4].type(result['ppn_output_coordinates'][0].dtype) + if 'ppn_output_coordinates' in result: + result['ppn_output_coordinates'][0] = input[0][:,:4].type(result['ppn_output_coordinates'][0].dtype) # The rest of the chain only needs 1 input feature if self.input_features > 1: diff --git a/mlreco/utils/gnn/data.py b/mlreco/utils/gnn/data.py index e71982c3..c8b2d8f2 100644 --- a/mlreco/utils/gnn/data.py +++ b/mlreco/utils/gnn/data.py @@ -304,7 +304,7 @@ def split_edge_index(edge_index: nb.int64[:,:], """ # If the input is empty, simply return defaults if not edge_index.shape[1]: - return [np.empty((2,0), dtype=np.int64) for b in batches], [np.empty(0, dtype=np.int64) for b in batches] + return [np.empty((0,2), dtype=np.int64) for b in batches], [np.empty(0, dtype=np.int64) for b in batches] # For each batch ID, get the list of edges that belong to it ebids = [np.where(batch_ids[edge_index[0]] == b)[0] for b in batches] diff --git a/mlreco/utils/gnn/evaluation.py b/mlreco/utils/gnn/evaluation.py index 978d4f34..fca4327a 100644 --- a/mlreco/utils/gnn/evaluation.py +++ b/mlreco/utils/gnn/evaluation.py @@ -216,7 +216,7 @@ def edge_assignment_score(edge_index: nb.int64[:,:], """ # If there is no edge, do not bother if not len(edge_index): - return np.empty((2,0), dtype=np.int64), np.zeros(n, dtype=np.int64), 0. + return np.empty((0,2), dtype=np.int64), np.zeros(n, dtype=np.int64), 0. # Build an input adjacency matrix to constrain the edge selection to the input graph adj_mat = adjacency_matrix(edge_index, n) From 5b01f00fe6fc6e8fad341bb2e4d2306dd79f9945 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Wed, 24 Aug 2022 15:43:51 -0700 Subject: [PATCH 06/52] Heuristic patch for shower start point prediction --- mlreco/utils/gnn/data.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/mlreco/utils/gnn/data.py b/mlreco/utils/gnn/data.py index e71982c3..8166a6d1 100644 --- a/mlreco/utils/gnn/data.py +++ b/mlreco/utils/gnn/data.py @@ -209,15 +209,13 @@ def _get_extra_gnn_features(fragments, end_points = get_track_endpoints_geo(input[0], f, points_tensor) ppn_points = torch.cat((ppn_points, end_points.reshape(1,-1)), dim=0) else: - dmask = torch.nonzero(torch.max( - torch.abs(points_tensor[f,:3]), dim=1).values < 1., - as_tuple=True)[0] + scores = torch.softmax(points_tensor[f, -2:], dim=1)[:,-1] # scores = torch.sigmoid(points_tensor[f, -1]) - # argmax = dmask[torch.argmax(scores[dmask])] \ - # if len(dmask) else torch.argmax(scores) - scores = torch.softmax(points_tensor[f, -2:], dim=1) - argmax = dmask[torch.argmax(scores[dmask, -1])] \ - if len(dmask) else torch.argmax(scores[:, -1]) + dmask = torch.nonzero((scores > 0.5) & (torch.max( + torch.abs(points_tensor[f,:3]), dim=1).values < 1.), + as_tuple=True)[0] + argmax = dmask[torch.argmax(scores[dmask])] \ + if len(dmask) else torch.argmax(scores) start = input[0][f][argmax,1:4] + \ points_tensor[f][argmax,:3] + 0.5 ppn_points = torch.cat((ppn_points, @@ -261,7 +259,7 @@ def split_clusts(clusts, batch_ids, batches, counts): [np.ndarray] : (B) List of cluster IDs in each batch """ clusts_split, cbids = _split_clusts(clusts, batch_ids, batches, counts) - + # Cast the list of clusters to np.array (object type) same_length = [np.all([len(c) == len(bclusts[0]) for c in bclusts]) for bclusts in clusts_split] return [np.array(clusts_split[b], dtype=object if not sl else np.int64) for b, sl in enumerate(same_length)], cbids From a0cc9ba7d5302de2b030703074922fe842cbf285 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Thu, 25 Aug 2022 15:49:20 -0700 Subject: [PATCH 07/52] Changed default GrapPA accuracy from 0 to 1 (e.g. no node -> accuracy of 1) --- .../models/layers/gnn/losses/edge_channel.py | 12 +--- .../layers/gnn/losses/node_kinematics.py | 63 +++++-------------- .../models/layers/gnn/losses/node_primary.py | 12 +--- mlreco/models/layers/gnn/losses/node_type.py | 12 +--- 4 files changed, 22 insertions(+), 77 deletions(-) diff --git a/mlreco/models/layers/gnn/losses/edge_channel.py b/mlreco/models/layers/gnn/losses/edge_channel.py index 713f0da8..12b587d0 100644 --- a/mlreco/models/layers/gnn/losses/edge_channel.py +++ b/mlreco/models/layers/gnn/losses/edge_channel.py @@ -172,16 +172,8 @@ def forward(self, out, clusters, graph=None): # Increment the number of edges n_edges += len(edge_pred) - # Handle the case where no cluster/edge were found - if not n_edges: - return { - 'accuracy': 0., - 'loss': torch.tensor(0., requires_grad=True, device=clusters[0].device), - 'n_edges': n_edges - } - return { - 'accuracy': total_acc/n_edges, - 'loss': total_loss/n_edges, + 'accuracy': total_acc/n_edges if n_edges else 1., + 'loss': total_loss/n_edges if n_edges else torch.tensor(0., requires_grad=True, device=clusters[0].device), 'n_edges': n_edges } diff --git a/mlreco/models/layers/gnn/losses/node_kinematics.py b/mlreco/models/layers/gnn/losses/node_kinematics.py index cc8f2e5a..23afe964 100644 --- a/mlreco/models/layers/gnn/losses/node_kinematics.py +++ b/mlreco/models/layers/gnn/losses/node_kinematics.py @@ -146,8 +146,7 @@ def forward(self, out, types): compute_momentum = False compute_vtx = 'node_pred_vtx' in out - anchors_list = [] - vertex_labels = [] + vtx_anchors, vtx_labels = [], [] for i in range(len(types)): @@ -269,7 +268,7 @@ def forward(self, out, types): vtx_label = torch.tensor(node_assn_vtx[valid_mask_vtx][pos_mask_vtx], dtype=node_pred_vtx.dtype, device=node_pred_vtx.device) if self.normalize_vtx_label: # If requested, bring vertex labels in the range [0,1 ] vtx_label = vtx_label/self.spatial_size - vertex_labels.append(vtx_label.detach().cpu().numpy()) + vtx_labels.append(vtx_label.detach().cpu().numpy()) vtx_pred = node_pred_vtx[pos_mask_vtx,:3] if self.use_anchor_points: # If requested, predict positions with respect to anchor points (end points of particles) @@ -278,7 +277,7 @@ def forward(self, out, types): min_dist = torch.argmin(dist_to_anchor, dim=1) range_index = torch.arange(end_points.shape[0]).to(device=end_points.device).long() anchors = end_points[range_index, min_dist, :] - anchors_list.append(anchors.detach().cpu().numpy()) + vtx_anchors.append(anchors.detach().cpu().numpy()) vtx_pred = vtx_pred + anchors loss2 = torch.mean(torch.clamp(torch.sum(self.vtx_position_loss(vtx_pred, vtx_label), dim=1), @@ -293,7 +292,7 @@ def forward(self, out, types): n_clusts_vtx += len(valid_mask_vtx) n_clusts_vtx_pos += len(pos_mask_vtx) else: - vertex_labels.append(np.empty((0,3))) + vtx_labels.append(np.empty((0,3))) if self.use_anchor_points: anchors.append(np.empty((0,3))) # Compute the accuracy of assignment (fraction of correctly assigned nodes) @@ -310,63 +309,33 @@ def forward(self, out, types): n_clusts = n_clusts_type + n_clusts_momentum + n_clusts_vtx + n_clusts_vtx_pos - # Handle the case where no cluster/edge were found - if not n_clusts: - result = { - 'accuracy': 0., - 'loss': torch.tensor(0., requires_grad=True, device=types[0].device if len(types) and torch.is_tensor(types[0]) else 'cpu', dtype=torch.float), - 'n_clusts_momentum': n_clusts_momentum, - 'n_clusts_type': n_clusts_type, - 'n_clusts_vtx': n_clusts_vtx, - 'n_clusts_vtx_positives': n_clusts_vtx_pos - } - if compute_type: - result.update({ - 'type_loss': 0., - 'type_accuracy': 0., - }) - if compute_momentum: - result.update({ - 'p_loss': 0., - 'p_accuracy': 0., - }) - if compute_vtx: - result.update({ - 'vtx_position_loss': 0., - 'vtx_score_loss': 0., - 'vtx_position_acc': 0., - 'vtx_score_acc': 0., - }) - return result - result = { - 'accuracy': (type_acc+p_acc+vtx_position_acc+vtx_score_acc)/n_clusts, - 'loss': total_loss/n_clusts, + 'accuracy': (type_acc + p_acc + vtx_position_acc + vtx_score_acc)/n_clusts if n_clusts else 1., + 'loss': total_loss/n_clusts if n_clusts else torch.tensor(0., requires_grad=True, device=types[0].device, dtype=torch.float), 'n_clusts_momentum': n_clusts_momentum, 'n_clusts_type': n_clusts_type, 'n_clusts_vtx': n_clusts_vtx, 'n_clusts_vtx_positives': n_clusts_vtx_pos } - result['anchors'] = anchors_list - result['vertex_labels'] = vertex_labels - if compute_type: result.update({ - 'type_accuracy': 0. if not n_clusts_type else type_acc/n_clusts_type, - 'type_loss': 0. if not n_clusts_type else type_loss/n_clusts_type, + 'type_accuracy': type_acc/n_clusts_type if n_clusts_type else 1., + 'type_loss': type_loss/n_clusts_type if n_clusts_type else 0. }) if compute_momentum: result.update({ - 'p_accuracy': 0. if not n_clusts_momentum else p_acc/n_clusts_momentum, - 'p_loss': 0. if not n_clusts_momentum else p_loss/n_clusts_momentum, + 'p_accuracy': p_acc/n_clusts_momentum if n_clusts_momentum else 1., + 'p_loss': p_loss/n_clusts_momentum if p_loss else 0. }) if compute_vtx: result.update({ - 'vtx_score_loss': 0. if not n_clusts_vtx else vtx_score_loss/n_clusts_vtx, - 'vtx_score_acc': 0. if not n_clusts_vtx else vtx_score_acc/n_clusts_vtx, - 'vtx_position_loss': 0. if not n_clusts_vtx_pos else vtx_position_loss/n_clusts_vtx_pos, - 'vtx_position_acc': 0. if not n_clusts_vtx_pos else vtx_position_acc/n_clusts_vtx_pos, + 'vtx_anchors': vtx_anchors, + 'vtx_labels': vtx_labels, + 'vtx_score_loss': vtx_score_loss/n_clusts_vtx if n_clusts_vtx else 0., + 'vtx_score_acc': vtx_score_acc/n_clusts_vtx if n_clusts_vtx else 1., + 'vtx_position_loss': vtx_position_loss/n_clusts_vtx_pos if n_clusts_vtx_pos else 0., + 'vtx_position_acc': vtx_position_acc/n_clusts_vtx_pos if n_clusts_vtx_pos else 1. }) return result diff --git a/mlreco/models/layers/gnn/losses/node_primary.py b/mlreco/models/layers/gnn/losses/node_primary.py index 8629c242..73d91520 100644 --- a/mlreco/models/layers/gnn/losses/node_primary.py +++ b/mlreco/models/layers/gnn/losses/node_primary.py @@ -127,16 +127,8 @@ def forward(self, out, clusters): # Increment the number of nodes n_clusts += len(clusts) - # Handle the case where no cluster/edge were found - if not n_clusts: - return { - 'accuracy': 0., - 'loss': torch.tensor(0., requires_grad=True, device=clusters[0].device), - 'n_clusts': n_clusts - } - return { - 'accuracy': total_acc/n_clusts, - 'loss': total_loss/n_clusts, + 'accuracy': total_acc/n_clusts if n_clusts else 1., + 'loss': total_loss/n_clusts if n_clusts else torch.tensor(0., requires_grad=True, device=clusters[0].device), 'n_clusts': n_clusts } diff --git a/mlreco/models/layers/gnn/losses/node_type.py b/mlreco/models/layers/gnn/losses/node_type.py index b3554da5..a6839c68 100644 --- a/mlreco/models/layers/gnn/losses/node_type.py +++ b/mlreco/models/layers/gnn/losses/node_type.py @@ -105,16 +105,8 @@ def forward(self, out, types): # Increment the number of nodes n_clusts += len(node_mask) - # Handle the case where no cluster/edge were found - if not n_clusts: - return { - 'accuracy': 0., - 'loss': torch.tensor(0., requires_grad=True, device=types[0].device), - 'n_clusts': n_clusts - } - return { - 'accuracy': total_acc/n_clusts, - 'loss': total_loss/n_clusts, + 'accuracy': total_acc/n_clusts if n_clusts else 1., + 'loss': total_loss/n_clusts if n_clusts else torch.tensor(0., requires_grad=True, device=types[0].device), 'n_clusts': n_clusts } From 24d67ff6e68939594c0c2f82a1060ab549c42d85 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Thu, 1 Sep 2022 13:22:14 -0700 Subject: [PATCH 08/52] Fixed bug in label end points used in the standalone GrapPA model --- mlreco/utils/gnn/cluster.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlreco/utils/gnn/cluster.py b/mlreco/utils/gnn/cluster.py index 46a44a84..7136f14d 100644 --- a/mlreco/utils/gnn/cluster.py +++ b/mlreco/utils/gnn/cluster.py @@ -373,12 +373,12 @@ def _get_cluster_points_label(data: nb.float64[:,:], # Get start and end points (one and the same for all but track class) batch_ids = _get_cluster_batch(data, clusts) points = np.empty((len(clusts), 6), dtype=data.dtype) - for i, c in enumerate(clusts): # Here clusters are groups + for i, c in enumerate(clusts): batch_mask = np.where(particles[:,batch_col] == batch_ids[i])[0] clust_ids = np.unique(data[c, 5]).astype(np.int64) minid = np.argmin(particles[batch_mask][clust_ids,-2]) # Pick the first cluster in time - order = np.array([0, 1, 2, 4, 5, 6]) if (np.random.choice(2) or not random_order) else np.array([4, 5, 6, 0, 1, 2]) - points[i] = particles[batch_mask][clust_ids[minid]][order] + order = np.arange(6) if (np.random.choice(2) or not random_order) else np.array([3, 4, 5, 0, 1, 2]) + points[i] = particles[batch_mask][clust_ids[minid]][order+1] # The first column is the batch ID # Bring the start points to the closest point in the corresponding cluster for i, c in enumerate(clusts): From 382e529b808de5b166cbc9c06f1482fe1b5febd7 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Thu, 1 Sep 2022 23:29:11 -0700 Subject: [PATCH 09/52] Include MPR primaries into the primary particle target --- mlreco/iotools/parsers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlreco/iotools/parsers/cluster.py b/mlreco/iotools/parsers/cluster.py index d139f301..5f852e82 100644 --- a/mlreco/iotools/parsers/cluster.py +++ b/mlreco/iotools/parsers/cluster.py @@ -146,7 +146,7 @@ def parse_cluster3d(cluster_event, labels['vtx_x'] = np.array([p.ancestor_position().x() for p in particles_asis_v]) labels['vtx_y'] = np.array([p.ancestor_position().y() for p in particles_asis_v]) labels['vtx_z'] = np.array([p.ancestor_position().z() for p in particles_asis_v]) - labels['primary_group'] = np.array((nu_ids > 0) & np.array([p.group_id()==p.parent_id() for p in particles_v], dtype=bool), dtype=np.float32) + labels['primary_group'] = np.array([p.group_id()==p.parent_id() for p in particles_v], dtype=np.float32) labels['sem'] = np.array([p.shape() for p in particles_v]) # Loop over clusters, store info From d9a4e6dc75d35ddf4a78f4108ce8ef4a077df32f Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Thu, 8 Sep 2022 14:18:53 -0700 Subject: [PATCH 10/52] Fix in docstring of neutrino parser --- mlreco/iotools/parsers/particles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlreco/iotools/parsers/particles.py b/mlreco/iotools/parsers/particles.py index 32934e0d..b29a7203 100644 --- a/mlreco/iotools/parsers/particles.py +++ b/mlreco/iotools/parsers/particles.py @@ -56,7 +56,7 @@ def parse_neutrino_asis(neutrino_event, cluster_event): schema: neutrino_asis: parser: parse_neutrino_asis - particle_asis: + args: neutrino_event: neutrino_mpv cluster_event: cluster3d_pcluster From 3aa4ec998de73aa4f3a0ae657d8e06c4eac3b669 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Fri, 9 Sep 2022 10:34:02 -0700 Subject: [PATCH 11/52] Include more details in UResNet+PPN log, harmonize PPN metric output --- mlreco/models/layers/common/gnn_full_chain.py | 6 +++--- mlreco/models/layers/common/ppnplus.py | 8 ++++---- mlreco/models/uresnet_ppn_chain.py | 10 ++++++---- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/mlreco/models/layers/common/gnn_full_chain.py b/mlreco/models/layers/common/gnn_full_chain.py index 82f05ff1..a9aa8204 100644 --- a/mlreco/models/layers/common/gnn_full_chain.py +++ b/mlreco/models/layers/common/gnn_full_chain.py @@ -669,8 +669,8 @@ def forward(self, out, seg_label, ppn_label=None, cluster_label=None, kinematics for key in res_ppn: res['ppn_' + key] = res_ppn[key] - accuracy += res_ppn['ppn_acc'] - loss += self.ppn_weight*res_ppn['ppn_loss'] + accuracy += res_ppn['accuracy'] + loss += self.ppn_weight*res_ppn['loss'] if self.enable_ghost and (self.enable_cnn_clust or \ self.enable_gnn_track or \ @@ -895,7 +895,7 @@ def forward(self, out, seg_label, ppn_label=None, cluster_label=None, kinematics if self.enable_uresnet: print('Segmentation Accuracy: {:.4f}'.format(res_seg['accuracy'])) if self.enable_ppn: - print('PPN Accuracy: {:.4f}'.format(res_ppn['ppn_acc'])) + print('PPN Accuracy: {:.4f}'.format(res_ppn['accuracy'])) if self.enable_cnn_clust and ('graph' in out or 'embeddings' in out): if not self._enable_graph_spice: print('Clustering Embedding Accuracy: {:.4f}'.format(res_cnn_clust['accuracy'])) diff --git a/mlreco/models/layers/common/ppnplus.py b/mlreco/models/layers/common/ppnplus.py index b07c3f07..90bc06fc 100644 --- a/mlreco/models/layers/common/ppnplus.py +++ b/mlreco/models/layers/common/ppnplus.py @@ -436,7 +436,7 @@ def forward(self, result, segment_label, particles_label): 'mask_loss': 0., 'type_loss': 0., 'classify_endpoints_loss': 0., - 'classify_endpoints_acc': 0. + 'classify_endpoints_accuracy': 0. } # Semantic Segmentation Loss for igpu in range(len(segment_label)): @@ -560,7 +560,7 @@ def forward(self, result, segment_label, particles_label): acc_classify_endpoints = acc_point_class / point_class_count #total_loss += loss_classify_endpoints.float() res['classify_endpoints_loss'] += float(loss_classify_endpoints) / num_batches - res['classify_endpoints_acc'] += float(acc_classify_endpoints) / num_batches + res['classify_endpoints_accuracy'] += float(acc_classify_endpoints) / num_batches # --- end of Endpoint classification # Distance Loss @@ -579,6 +579,6 @@ def forward(self, result, segment_label, particles_label): total_loss += loss_gpu total_acc /= num_batches - res['ppn_loss'] = total_loss - res['ppn_acc'] = float(total_acc) + res['loss'] = total_loss + res['accuracy'] = float(total_acc) return res diff --git a/mlreco/models/uresnet_ppn_chain.py b/mlreco/models/uresnet_ppn_chain.py index 3ee0f616..9f8f24d0 100644 --- a/mlreco/models/uresnet_ppn_chain.py +++ b/mlreco/models/uresnet_ppn_chain.py @@ -142,9 +142,11 @@ def forward(self, outputs, segment_label, particles_label, weights=None): outputs, segment_label, particles_label) res = { - 'loss': res_segmentation['loss'] + res_ppn['ppn_loss'], - 'accuracy': (res_segmentation['accuracy'] + res_ppn['ppn_acc']) / 2.0, - 'reg_loss': res_ppn['reg_loss'], - 'type_loss': res_ppn['type_loss'] + 'loss': res_segmentation['loss'] + res_ppn['loss'], + 'accuracy': (res_segmentation['accuracy'] + res_ppn['accuracy'])/2 } + + res.update({'segmentation_'+k:v for k, v in res_segmentation.items()}) + res.update({'ppn_'+k:v for k, v in res_ppn.items()}) + return res From 5e8e490ebf44751d469009240d73ea46b099f919 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Fri, 9 Sep 2022 10:44:05 -0700 Subject: [PATCH 12/52] More semantic harmonization in the full chain metric output --- mlreco/models/layers/common/gnn_full_chain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlreco/models/layers/common/gnn_full_chain.py b/mlreco/models/layers/common/gnn_full_chain.py index a9aa8204..e80d5618 100644 --- a/mlreco/models/layers/common/gnn_full_chain.py +++ b/mlreco/models/layers/common/gnn_full_chain.py @@ -658,7 +658,7 @@ def forward(self, out, seg_label, ppn_label=None, cluster_label=None, kinematics else: res_seg = self.uresnet_loss({'segmentation':[out['segmentation'][0][deghost]]}, [seg_label[0][deghost]]) for key in res_seg: - res['uresnet_' + key] = res_seg[key] + res['segmentation_' + key] = res_seg[key] accuracy += res_seg['accuracy'] loss += self.segmentation_weight*res_seg['loss'] #print('uresnet ', self.segmentation_weight, res_seg['loss'], loss) From f9ffc378929a78c023c2a6d94ee423762428a274 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Mon, 12 Sep 2022 09:17:25 -0700 Subject: [PATCH 13/52] Constrain full chain fragment end points to fragment voxel set --- mlreco/utils/gnn/data.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/mlreco/utils/gnn/data.py b/mlreco/utils/gnn/data.py index 65e21d88..acdeb9e2 100644 --- a/mlreco/utils/gnn/data.py +++ b/mlreco/utils/gnn/data.py @@ -163,7 +163,10 @@ def _get_extra_gnn_features(fragments, input, result, use_ppn=False, - use_supp=False): + use_supp=False, + enhance=False, + allow_outside=False, + coords_col=(1, 4)): """ Extracting extra features to feed into the GNN particle aggregators @@ -171,6 +174,14 @@ def _get_extra_gnn_features(fragments, end points for tracks (+ direction estimate) - Supplemental: Mean/RMS energy in the fragment + semantic class + If the `enhance` parameter is `True`, tracks leverage PPN predictions + to provide a more accurate estimate of the end points. This needs to be + avoided for track fragments, as PPN is not trained to find end points for them. + If set to `False`, the two voxels furthest away from each other are picked. + + If the `allow_outside` parameter is `True`, the end point estimates + are *not* brought back to the closest fragment voxel. + Parameters ========== fragments: np.ndarray @@ -180,6 +191,8 @@ def _get_extra_gnn_features(fragments, result: dictionary use_ppn: bool use_supp: bool + enhance: bool + allow_outside: bool Returns ======= @@ -205,9 +218,9 @@ def _get_extra_gnn_features(fragments, dtype=torch.double) points_tensor = result['points'][0].detach().double() for i, f in enumerate(fragments[mask]): + fragment_voxels = input[0][f][:,coords_col[0]:coords_col[1]] if frag_seg[mask][i] == 1: - end_points = get_track_endpoints_geo(input[0], f, points_tensor) - ppn_points = torch.cat((ppn_points, end_points.reshape(1,-1)), dim=0) + end_points = get_track_endpoints_geo(input[0], f, points_tensor if enhance else None) else: scores = torch.softmax(points_tensor[f, -2:], dim=1)[:,-1] # scores = torch.sigmoid(points_tensor[f, -1]) @@ -216,10 +229,15 @@ def _get_extra_gnn_features(fragments, as_tuple=True)[0] argmax = dmask[torch.argmax(scores[dmask])] \ if len(dmask) else torch.argmax(scores) - start = input[0][f][argmax,1:4] + \ - points_tensor[f][argmax,:3] + 0.5 - ppn_points = torch.cat((ppn_points, - torch.cat([start, start]).reshape(1,-1)), dim=0) + start = fragment_voxels[argmax] + points_tensor[f][argmax,:3] + 0.5 + end_points = torch.cat([start, start]) + + if not allow_outside: + dist_mat = torch.cdist(end_points.reshape(-1,3), fragment_voxels) + argmins = torch.argmin(dist_mat, dim=1) + end_points = torch.cat([fragment_voxels[argmins[0]], fragment_voxels[argmins[1]]]) + + ppn_points = torch.cat((ppn_points, end_points.reshape(1,-1)), dim=0) kwargs['points'] = ppn_points From 6871d05ca5a5919eb8dea25ceb3398a7255cf639 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Mon, 12 Sep 2022 13:39:16 -0700 Subject: [PATCH 14/52] Save time on track fragment end point heuristic --- mlreco/utils/gnn/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlreco/utils/gnn/data.py b/mlreco/utils/gnn/data.py index acdeb9e2..cbb8c45a 100644 --- a/mlreco/utils/gnn/data.py +++ b/mlreco/utils/gnn/data.py @@ -232,7 +232,7 @@ def _get_extra_gnn_features(fragments, start = fragment_voxels[argmax] + points_tensor[f][argmax,:3] + 0.5 end_points = torch.cat([start, start]) - if not allow_outside: + if not allow_outside and (frag_seg[mask][i] != 1 or (frag_seg[mask][i] == 1 and enhance)): dist_mat = torch.cdist(end_points.reshape(-1,3), fragment_voxels) argmins = torch.argmin(dist_mat, dim=1) end_points = torch.cat([fragment_voxels[argmins[0]], fragment_voxels[argmins[1]]]) From aeae78a786c62f287b500350339e9f21b59b5261 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Tue, 13 Sep 2022 10:50:24 -0700 Subject: [PATCH 15/52] Fixed bug where unwrapper fails when use_anchor_points the GNN node kinematics loss is False --- mlreco/models/layers/gnn/losses/node_kinematics.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/mlreco/models/layers/gnn/losses/node_kinematics.py b/mlreco/models/layers/gnn/losses/node_kinematics.py index 23afe964..2c26e8d7 100644 --- a/mlreco/models/layers/gnn/losses/node_kinematics.py +++ b/mlreco/models/layers/gnn/losses/node_kinematics.py @@ -166,11 +166,9 @@ def forward(self, out, types): clusts = out['clusts'][i][j] # Increment the type loss, balance classes if requested - if compute_type: + if compute_type and out['node_pred_type'][i][j].shape: # Get the type predictions and true types from the specified columns node_pred_type = out['node_pred_type'][i][j] - if not node_pred_type.shape[0]: - continue node_assn_type = get_cluster_label(labels, clusts, column=self.type_col) # Do not apply loss to nodes labeled -1 (unknown class) @@ -207,11 +205,9 @@ def forward(self, out, types): n_clusts_type += len(valid_mask_type) # Increment the momentum loss - if compute_momentum: + if compute_momentum and out['node_pred_p'][i][j].shape: # Get the momentum predictions and true momenta from the specified columns node_pred_p = out['node_pred_p'][i][j] - if not node_pred_p.shape[0]: - continue node_assn_p = get_momenta_label(labels, clusts, column=self.momentum_col) # Do not apply loss to nodes labeled -1 (unknown class) @@ -236,12 +232,10 @@ def forward(self, out, types): # Increment the number of nodes n_clusts_momentum += len(clusts) - if compute_vtx: + if compute_vtx and out['node_pred_vtx'][i][j].shape: # Get the vertex predictions, node features and true vertices from the specified columns node_pred_vtx = out['node_pred_vtx'][i][j] input_node_features = out['input_node_features'][i][j] - if not node_pred_vtx.shape[0]: - continue node_assn_vtx = np.stack([get_cluster_label(labels, clusts, column=c) for c in range(self.vtx_col, self.vtx_col+3)], axis=1) node_assn_vtx_pos = get_cluster_label(labels, clusts, column=self.vtx_positives_col) @@ -330,13 +324,13 @@ def forward(self, out, types): }) if compute_vtx: result.update({ - 'vtx_anchors': vtx_anchors, 'vtx_labels': vtx_labels, 'vtx_score_loss': vtx_score_loss/n_clusts_vtx if n_clusts_vtx else 0., 'vtx_score_acc': vtx_score_acc/n_clusts_vtx if n_clusts_vtx else 1., 'vtx_position_loss': vtx_position_loss/n_clusts_vtx_pos if n_clusts_vtx_pos else 0., 'vtx_position_acc': vtx_position_acc/n_clusts_vtx_pos if n_clusts_vtx_pos else 1. }) + if self.use_anchor_points: result['vtx_anchors'] = vtx_anchors return result From 9fa6a942fe29ae731eda2e506a9b797fd58aa096 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Tue, 13 Sep 2022 13:12:31 -0700 Subject: [PATCH 16/52] Updated default list of results to be concatenated --- mlreco/main_funcs.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mlreco/main_funcs.py b/mlreco/main_funcs.py index 0f33ff85..822f65bc 100644 --- a/mlreco/main_funcs.py +++ b/mlreco/main_funcs.py @@ -12,6 +12,7 @@ # happens in the process_config function before anything # else is allowed to happen. + class Handlers: cfg = None data_io = None @@ -45,18 +46,21 @@ def inference(cfg, event_list=None): def process_config(cfg, verbose=True): - # Set GPUS to be used if 'trainval' in cfg: + # Set GPUs to be used os.environ['CUDA_VISIBLE_DEVICES'] = cfg['trainval']['gpus'] cfg['trainval']['gpus'] = list(range(len([int(a) for a in cfg['trainval']['gpus'].split(',') if a.isdigit()]))) + # Update seed if cfg['trainval']['seed'] < 0: import time cfg['trainval']['seed'] = int(time.time()) else: cfg['trainval']['seed'] = int(cfg['trainval']['seed']) + # Set MinkowskiEngine number of threads os.environ['OMP_NUM_THREADS'] = '16' # default value + # Set default concat_result default_concat_result = ['input_edge_features', 'input_node_features','points', 'coordinates', 'particle_node_features', 'particle_edge_features', @@ -71,7 +75,7 @@ def process_config(cfg, verbose=True): 'particle_edge_pred', 'particle_group_pred', 'particles', 'inter_edge_index', 'inter_node_pred', 'inter_edge_pred', 'inter_group_pred', 'inter_particles', 'node_pred_p', 'node_pred_type', - 'vertex_labels', 'anchors', 'grappa_inter_vertex_labels', 'grappa_inter_anchors', + 'vtx_labels', 'vtx_anchors', 'grappa_inter_vtx_labels', 'grappa_inter_vtx_anchors', 'kinematics_node_pred_p', 'kinematics_node_pred_type', 'flow_edge_pred', 'kinematics_particles', 'kinematics_edge_index', 'clust_fragments', 'clust_frag_seg', 'interactions', 'inter_cosmic_pred', From 2e5bd1184fa59c251f72b7d7e14d55b9cf5403a7 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Thu, 15 Sep 2022 12:00:32 -0700 Subject: [PATCH 17/52] Added better handling of empty input in UResNet and PPN loss. Skip full chain GNN section if no voxels --- mlreco/models/layers/common/gnn_full_chain.py | 2 +- mlreco/models/layers/common/ppnplus.py | 14 +++++++------- mlreco/models/uresnet.py | 18 +++++++++--------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/mlreco/models/layers/common/gnn_full_chain.py b/mlreco/models/layers/common/gnn_full_chain.py index e80d5618..6cda0c5e 100644 --- a/mlreco/models/layers/common/gnn_full_chain.py +++ b/mlreco/models/layers/common/gnn_full_chain.py @@ -582,7 +582,7 @@ def forward(self, input): """ result, input, revert_func = self.full_chain_cnn(input) - if self.process_fragments and (self.enable_gnn_track or self.enable_gnn_shower or self.enable_gnn_inter or self.enable_gnn_particle): + if len(input[0]) and self.process_fragments and (self.enable_gnn_track or self.enable_gnn_shower or self.enable_gnn_inter or self.enable_gnn_particle): result = self.full_chain_gnn(result, input) result = revert_func(result) diff --git a/mlreco/models/layers/common/ppnplus.py b/mlreco/models/layers/common/ppnplus.py index 90bc06fc..bcd689f9 100644 --- a/mlreco/models/layers/common/ppnplus.py +++ b/mlreco/models/layers/common/ppnplus.py @@ -566,19 +566,19 @@ def forward(self, result, segment_label, particles_label): # Distance Loss d2, _ = torch.min(distance_positives, dim=0) reg_loss = d2.mean() - res['reg_loss'] += float(reg_loss) / num_batches - res['type_loss'] += float(type_loss) / num_batches - res['mask_loss'] += float(mask_loss_final) / num_batches - total_loss += (reg_loss + type_loss + mask_loss_final) / num_batches + res['reg_loss'] += float(reg_loss) / num_batches if num_batches else float(reg_loss) + res['type_loss'] += float(type_loss) / num_batches if num_batches else float(type_loss) + res['mask_loss'] += float(mask_loss_final) / num_batches if num_batches else float(mask_loss_final) + total_loss += (reg_loss + type_loss + mask_loss_final) / num_batches if num_batches else reg_loss + type_loss + mask_loss_final if self._classify_endpoints: - total_loss += loss_classify_endpoints / num_batches + total_loss += loss_classify_endpoints / num_batches if num_batches else loss_classify_endpoints - loss_layer /= num_batches + loss_layer /= max(1, num_batches) loss_gpu += loss_layer loss_gpu /= len(ppn_layers) total_loss += loss_gpu - total_acc /= num_batches + total_acc = total_acc / num_batches if num_batches else 1. res['loss'] = total_loss res['accuracy'] = float(total_acc) return res diff --git a/mlreco/models/uresnet.py b/mlreco/models/uresnet.py index 337ecc43..036762ec 100644 --- a/mlreco/models/uresnet.py +++ b/mlreco/models/uresnet.py @@ -282,23 +282,23 @@ def forward(self, result, label, weights=None): if self._ghost: results = { - 'accuracy': uresnet_acc/count if count else 0., - 'loss': (self._alpha * uresnet_loss + self._beta * mask_loss)/count if count else (self._alpha * uresnet_loss + self._beta * mask_loss), - 'ghost_mask_acc': mask_acc / count if count else 0., - 'ghost_mask_loss': self._beta * mask_loss / count if count else self._beta * mask_los, + 'accuracy': uresnet_acc/count if count else 1., + 'loss': (self._alpha * uresnet_loss + self._beta * mask_loss)/count if count else self._alpha * uresnet_loss + self._beta * mask_loss, + 'ghost_mask_accuracy': mask_acc / count if count else 1., + 'ghost_mask_loss': self._beta * mask_loss / count if count else self._beta * mask_loss, + 'uresnet_accuracy': uresnet_acc / count if count else 1., 'uresnet_loss': self._alpha * uresnet_loss / count if count else self._alpha * uresnet_loss, - 'uresnet_acc': uresnet_acc / count if count else 0., - 'ghost2ghost': ghost2ghost / count if count else 0., - 'nonghost2nonghost': nonghost2nonghost / count if count else 0. + 'ghost2ghost': ghost2ghost / count if count else 1., + 'nonghost2nonghost': nonghost2nonghost / count if count else 1. } else: results = { - 'accuracy': uresnet_acc/count if count else 0., + 'accuracy': uresnet_acc/count if count else 1., 'loss': uresnet_loss/count if count else uresnet_loss } for c in range(self._num_classes): if count_class[c] > 0: results['accuracy_class_%d' % c] = uresnet_acc_class[c]/count_class[c] else: - results['accuracy_class_%d' % c] = -1. + results['accuracy_class_%d' % c] = 1. return results From 98a4186575d5117274aea2d0d80d1319aabbbca3 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Thu, 15 Sep 2022 14:59:28 -0700 Subject: [PATCH 18/52] Fixed PPN postprocessing for batch_size>1 (was broken by not unwrapping points output) --- mlreco/models/full_chain.py | 34 +++++-------------- mlreco/models/layers/common/gnn_full_chain.py | 12 +++++-- mlreco/utils/ppn.py | 24 ++++++------- 3 files changed, 30 insertions(+), 40 deletions(-) diff --git a/mlreco/models/full_chain.py b/mlreco/models/full_chain.py index e1334d32..c01eacbc 100644 --- a/mlreco/models/full_chain.py +++ b/mlreco/models/full_chain.py @@ -210,14 +210,13 @@ def full_chain_cnn(self, input): # Rescale the charge column, store it charges = compute_rescaled_charge(input[0], deghost, last_index=last_index) - input[0][deghost, 4] = charges - result.update({'input_rescaled':[input[0][deghost,:5]]}) + full_n = len(input[0]) + input[0] = input[0][deghost] + input[0][:, 4] = charges + result.update({'input_rescaled':[input[0][:,:5]]}) + if self.enable_uresnet: - if self.enable_charge_rescaling and deghost.sum() > 0: - assert not self.uresnet_lonely.ghost - result.update(self.uresnet_lonely([input[0][deghost, :4+self.input_features]])) - else: - result.update(self.uresnet_lonely([input[0][:, :4+self.input_features]])) + result.update(self.uresnet_lonely([input[0][:, :4+self.input_features]])) if self.enable_ppn: ppn_input = {} @@ -232,27 +231,13 @@ def full_chain_cnn(self, input): ppn_input['decoderTensors'][0]) result.update(ppn_output) - if self.enable_charge_rescaling and deghost.sum() > 0: - # Reshape output tensors of UResNet and PPN to be of the original shape - for key in ['segmentation', 'points', 'classify_endpoints', 'mask_ppn', 'ppn_coords', 'ppn_layers']: - if key not in result: continue - res = result[key][0] if isinstance(result[key][0], torch.Tensor) else result[key][0][-1] - tensor = torch.zeros((input[0].shape[0], res.shape[1]), dtype=res.dtype, device=res.device) - tensor[deghost] = res - if isinstance(result[key][0], torch.Tensor): - result[key][0] = tensor - else: - result[key][0][-1] = tensor - if 'ppn_output_coordinates' in result: - result['ppn_output_coordinates'][0] = input[0][:,:4].type(result['ppn_output_coordinates'][0].dtype) - # The rest of the chain only needs 1 input feature if self.input_features > 1: input[0] = input[0][:, :-self.input_features+1] cnn_result = {} - if self.enable_ghost: + if self.enable_ghost and not self.enable_charge_rescaling: # Update input based on deghosting results # if self.cheat_ghost: @@ -315,8 +300,7 @@ def full_chain_cnn(self, input): if self._gspice_use_true_labels: semantic_labels = label_seg[0][:, -1] else: - semantic_labels = torch.argmax(cnn_result['segmentation'][0], - dim=1).flatten() + semantic_labels = torch.argmax(cnn_result['segmentation'][0], dim=1).flatten() if self.enable_cnn_clust: if label_clustering is None and self.training: @@ -390,7 +374,7 @@ def full_chain_cnn(self, input): # cnn_result['true_points'] = coords def return_to_original(result): - if self.enable_ghost: + if self.enable_ghost and not self.enable_charge_rescaling: result['segmentation'][0] = segmentation return result diff --git a/mlreco/models/layers/common/gnn_full_chain.py b/mlreco/models/layers/common/gnn_full_chain.py index 6cda0c5e..7d3f3d74 100644 --- a/mlreco/models/layers/common/gnn_full_chain.py +++ b/mlreco/models/layers/common/gnn_full_chain.py @@ -650,12 +650,17 @@ def forward(self, out, seg_label, ppn_label=None, cluster_label=None, kinematics res['deghost_' + key] = res_deghost[key] accuracy += res_deghost['accuracy'] loss += self.deghost_weight*res_deghost['loss'] - deghost = (seg_label[0][:,-1] < 5) & (out['ghost'][0][:,0] > out['ghost'][0][:,1]) # Only non-ghost (both true and pred) can go in semseg eval + deghost = out['ghost'][0][:,0] > out['ghost'][0][:,1] if self.enable_uresnet: if not self.enable_charge_rescaling: res_seg = self.uresnet_loss(out, seg_label) else: + seg = out['segmentation'][0] + full_seg = torch.zeros((seg_label[0].shape[0], seg.shape[1]), dtype=seg.dtype, device=seg.device) + full_seg[deghost] = seg + out['segmentation'][0] = full_seg + deghost &= seg_label[0][:,-1] < 5 # Only apply loss to true non-ghosts res_seg = self.uresnet_loss({'segmentation':[out['segmentation'][0][deghost]]}, [seg_label[0][deghost]]) for key in res_seg: res['segmentation_' + key] = res_seg[key] @@ -672,7 +677,8 @@ def forward(self, out, seg_label, ppn_label=None, cluster_label=None, kinematics accuracy += res_ppn['accuracy'] loss += self.ppn_weight*res_ppn['loss'] - if self.enable_ghost and (self.enable_cnn_clust or \ + if self.enable_ghost and not self.enable_charge_rescaling and \ + (self.enable_cnn_clust or \ self.enable_gnn_track or \ self.enable_gnn_shower or \ self.enable_gnn_inter or \ @@ -726,7 +732,7 @@ def forward(self, out, seg_label, ppn_label=None, cluster_label=None, kinematics segmentation_pred = out['segmentation'][0] - if self.enable_ghost: + if self.enable_ghost and not self.enable_charge_rescaling: segmentation_pred = segmentation_pred[deghost] if self._gspice_use_true_labels: gs_seg_label = torch.cat([cluster_label[0][:, :4], segment_label[:, None]], dim=1) diff --git a/mlreco/utils/ppn.py b/mlreco/utils/ppn.py index b88021c6..c600de63 100644 --- a/mlreco/utils/ppn.py +++ b/mlreco/utils/ppn.py @@ -302,15 +302,15 @@ def uresnet_ppn_type_point_selector(data, out, score_threshold=0.5, type_score_t # If 'points' is specified in `concat_result`, # then it won't be unwrapped. if len(points) == len(ppn_coords[-1]): - #pass + pass # print(entry, np.unique(ppn_coords[-1][:, 0], return_counts=True)) - points = points[ppn_coords[-1][:, 0] == entry, :] + #points = points[ppn_coords[-1][:, 0] == entry, :] else: # in case it has been unwrapped (possible in no-ghost scenario) points = out['points'][entry] enable_classify_endpoints = 'classify_endpoints' in out if enable_classify_endpoints: - classify_endpoints = out['classify_endpoints'][0][ppn_coords[-1][:, 0] == entry, :]#[entry] + classify_endpoints = out['classify_endpoints'][0] mask_ppn = out['mask_ppn'][-1] # predicted type labels @@ -349,7 +349,7 @@ def uresnet_ppn_type_point_selector(data, out, score_threshold=0.5, type_score_t batch_index = batch_ids == b batch_index2 = ppn_coords[-1][:, 0] == b # print(batch_index.shape, batch_index2.shape, mask_ppn.shape, scores.shape) - mask = ((~(mask_ppn[batch_index2] == 0)).any(axis=1)) & (scores[batch_index][:, 1] > score_threshold) + mask = ((~(mask_ppn[batch_index2] == 0)).any(axis=1)) & (scores[batch_index2][:, 1] > score_threshold) # If we want to restrict the postprocessing to specific voxels # (e.g. within a particle cluster, not the full event) # then use the argument `selection`. @@ -362,26 +362,26 @@ def uresnet_ppn_type_point_selector(data, out, score_threshold=0.5, type_score_t new_mask[indices] = mask[indices] mask = new_mask - ppn_type_predictions = np.argmax(scipy.special.softmax(points[batch_index][mask][:, type_col[0]:type_col[1]], axis=1), axis=1) - ppn_type_softmax = scipy.special.softmax(points[batch_index][mask][:, type_col[0]:type_col[1]], axis=1) + ppn_type_predictions = np.argmax(scipy.special.softmax(points[batch_index2][mask][:, type_col[0]:type_col[1]], axis=1), axis=1) + ppn_type_softmax = scipy.special.softmax(points[batch_index2][mask][:, type_col[0]:type_col[1]], axis=1) if enable_classify_endpoints: - ppn_classify_endpoints = scipy.special.softmax(classify_endpoints[batch_index][mask], axis=1) + ppn_classify_endpoints = scipy.special.softmax(classify_endpoints[batch_index2][mask], axis=1) if enforce_type: for c in range(num_classes): uresnet_points = uresnet_predictions[batch_index][mask] == c ppn_points = ppn_type_softmax[:, c] > type_score_threshold #ppn_type_predictions == c if np.count_nonzero(ppn_points) > 0 and np.count_nonzero(uresnet_points) > 0: - d = scipy.spatial.distance.cdist(points[batch_index][mask][ppn_points][:, :3] + event_data[batch_index][mask][ppn_points][:, coords_col[0]:coords_col[1]] + 0.5, event_data[batch_index][mask][uresnet_points][:, coords_col[0]:coords_col[1]]) + d = scipy.spatial.distance.cdist(points[batch_index2][mask][ppn_points][:, :3] + event_data[batch_index][mask][ppn_points][:, coords_col[0]:coords_col[1]] + 0.5, event_data[batch_index][mask][uresnet_points][:, coords_col[0]:coords_col[1]]) ppn_mask = (d < type_threshold).any(axis=1) - final_points.append(points[batch_index][mask][ppn_points][ppn_mask][:, :3] + 0.5 + event_data[batch_index][mask][ppn_points][ppn_mask][:, coords_col[0]:coords_col[1]]) - final_scores.append(scores[batch_index][mask][ppn_points][ppn_mask]) + final_points.append(points[batch_index2][mask][ppn_points][ppn_mask][:, :3] + 0.5 + event_data[batch_index][mask][ppn_points][ppn_mask][:, coords_col[0]:coords_col[1]]) + final_scores.append(scores[batch_index2][mask][ppn_points][ppn_mask]) final_types.append(ppn_type_predictions[ppn_points][ppn_mask]) final_softmax.append(ppn_type_softmax[ppn_points][ppn_mask]) if enable_classify_endpoints: final_endpoints.append(ppn_classify_endpoints[ppn_points][ppn_mask]) else: - final_points = [points[batch_index][mask][:, :3] + 0.5 + event_data[batch_index][mask][:, coords_col[0]:coords_col[1]]] - final_scores = [scores[batch_index][mask]] + final_points = [points[batch_index2][mask][:, :3] + 0.5 + event_data[batch_index][mask][:, coords_col[0]:coords_col[1]]] + final_scores = [scores[batch_index2][mask]] final_types = [ppn_type_predictions] final_softmax = [ppn_type_softmax] if enable_classify_endpoints: From 7626cf8bdb3e65e51912cced831b35d2a9f056aa Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Fri, 16 Sep 2022 11:35:44 -0700 Subject: [PATCH 19/52] Fix bug with deghosted tensor indexing and cluster labeling --- mlreco/models/full_chain.py | 21 ++++++++++++------- mlreco/models/layers/common/gnn_full_chain.py | 10 ++------- .../layers/gnn/losses/node_kinematics.py | 12 +++++------ 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/mlreco/models/full_chain.py b/mlreco/models/full_chain.py index c01eacbc..203373b4 100644 --- a/mlreco/models/full_chain.py +++ b/mlreco/models/full_chain.py @@ -210,13 +210,18 @@ def full_chain_cnn(self, input): # Rescale the charge column, store it charges = compute_rescaled_charge(input[0], deghost, last_index=last_index) - full_n = len(input[0]) - input[0] = input[0][deghost] - input[0][:, 4] = charges - result.update({'input_rescaled':[input[0][:,:5]]}) + input[0][deghost, 4] = charges + result.update({'input_rescaled':[input[0][deghost,:5]]}) if self.enable_uresnet: - result.update(self.uresnet_lonely([input[0][:, :4+self.input_features]])) + if not self.enable_charge_rescaling: + result.update(self.uresnet_lonely([input[0][:, :4+self.input_features]])) + else: + result.update(self.uresnet_lonely([input[0][deghost, :4+self.input_features]])) + seg = result['segmentation'][0] + full_seg = torch.zeros((input[0].shape[0], seg.shape[1]), dtype=seg.dtype, device=seg.device) + full_seg[deghost] = seg + result['segmentation'][0] = full_seg if self.enable_ppn: ppn_input = {} @@ -237,7 +242,7 @@ def full_chain_cnn(self, input): cnn_result = {} - if self.enable_ghost and not self.enable_charge_rescaling: + if self.enable_ghost: # Update input based on deghosting results # if self.cheat_ghost: @@ -268,7 +273,7 @@ def full_chain_cnn(self, input): deghost_result.update(result) deghost_result.pop('ghost') deghost_result['segmentation'][0] = result['segmentation'][0][deghost] - if self.enable_ppn: + if self.enable_ppn and not self.enable_charge_rescaling: deghost_result['points'] = [result['points'][0][deghost]] if 'classify_endpoints' in deghost_result: deghost_result['classify_endpoints'] = [result['classify_endpoints'][0][deghost]] @@ -374,7 +379,7 @@ def full_chain_cnn(self, input): # cnn_result['true_points'] = coords def return_to_original(result): - if self.enable_ghost and not self.enable_charge_rescaling: + if self.enable_ghost: result['segmentation'][0] = segmentation return result diff --git a/mlreco/models/layers/common/gnn_full_chain.py b/mlreco/models/layers/common/gnn_full_chain.py index 7d3f3d74..26d4a08d 100644 --- a/mlreco/models/layers/common/gnn_full_chain.py +++ b/mlreco/models/layers/common/gnn_full_chain.py @@ -650,17 +650,12 @@ def forward(self, out, seg_label, ppn_label=None, cluster_label=None, kinematics res['deghost_' + key] = res_deghost[key] accuracy += res_deghost['accuracy'] loss += self.deghost_weight*res_deghost['loss'] - deghost = out['ghost'][0][:,0] > out['ghost'][0][:,1] + deghost = (out['ghost'][0][:,0] > out['ghost'][0][:,1]) & (seg_label[0][:,-1] < 5) # Only apply loss to reco/true non-ghosts if self.enable_uresnet: if not self.enable_charge_rescaling: res_seg = self.uresnet_loss(out, seg_label) else: - seg = out['segmentation'][0] - full_seg = torch.zeros((seg_label[0].shape[0], seg.shape[1]), dtype=seg.dtype, device=seg.device) - full_seg[deghost] = seg - out['segmentation'][0] = full_seg - deghost &= seg_label[0][:,-1] < 5 # Only apply loss to true non-ghosts res_seg = self.uresnet_loss({'segmentation':[out['segmentation'][0][deghost]]}, [seg_label[0][deghost]]) for key in res_seg: res['segmentation_' + key] = res_seg[key] @@ -677,8 +672,7 @@ def forward(self, out, seg_label, ppn_label=None, cluster_label=None, kinematics accuracy += res_ppn['accuracy'] loss += self.ppn_weight*res_ppn['loss'] - if self.enable_ghost and not self.enable_charge_rescaling and \ - (self.enable_cnn_clust or \ + if self.enable_ghost and (self.enable_cnn_clust or \ self.enable_gnn_track or \ self.enable_gnn_shower or \ self.enable_gnn_inter or \ diff --git a/mlreco/models/layers/gnn/losses/node_kinematics.py b/mlreco/models/layers/gnn/losses/node_kinematics.py index 2c26e8d7..a8f815f2 100644 --- a/mlreco/models/layers/gnn/losses/node_kinematics.py +++ b/mlreco/models/layers/gnn/losses/node_kinematics.py @@ -166,7 +166,7 @@ def forward(self, out, types): clusts = out['clusts'][i][j] # Increment the type loss, balance classes if requested - if compute_type and out['node_pred_type'][i][j].shape: + if compute_type and out['node_pred_type'][i][j].shape[0]: # Get the type predictions and true types from the specified columns node_pred_type = out['node_pred_type'][i][j] node_assn_type = get_cluster_label(labels, clusts, column=self.type_col) @@ -205,7 +205,7 @@ def forward(self, out, types): n_clusts_type += len(valid_mask_type) # Increment the momentum loss - if compute_momentum and out['node_pred_p'][i][j].shape: + if compute_momentum and out['node_pred_p'][i][j].shape[0]: # Get the momentum predictions and true momenta from the specified columns node_pred_p = out['node_pred_p'][i][j] node_assn_p = get_momenta_label(labels, clusts, column=self.momentum_col) @@ -232,7 +232,7 @@ def forward(self, out, types): # Increment the number of nodes n_clusts_momentum += len(clusts) - if compute_vtx and out['node_pred_vtx'][i][j].shape: + if compute_vtx and out['node_pred_vtx'][i][j].shape[0]: # Get the vertex predictions, node features and true vertices from the specified columns node_pred_vtx = out['node_pred_vtx'][i][j] input_node_features = out['input_node_features'][i][j] @@ -291,13 +291,13 @@ def forward(self, out, types): # Compute the accuracy of assignment (fraction of correctly assigned nodes) # and the accuracy of momentum estimation (RMS relative residual) - if compute_type and len(valid_mask_type): + if compute_type and out['node_pred_type'][i][j].shape[0] and len(valid_mask_type): type_acc += float(torch.sum(torch.argmax(node_pred_type, dim=1) == node_assn_type)) - if compute_momentum and len(valid_mask_p): + if compute_momentum and out['node_pred_p'][i][j].shape[0] and len(valid_mask_p): p_acc += float(torch.sum(1.- torch.abs(node_pred_p.squeeze()-node_assn_p)/node_assn_p)) # 1-MAPE - if compute_vtx and len(pos_mask_vtx): + if compute_vtx and out['node_pred_vtx'][i][j].shape[0] and len(pos_mask_vtx): vtx_position_acc += float(torch.sum(1. - torch.abs(vtx_pred - vtx_label)/(torch.abs(vtx_pred) + torch.abs(vtx_label))))/3. vtx_score_acc += float(torch.sum(torch.argmax(node_pred_vtx[:,3:], dim=1) == node_assn_vtx_pos)) From 836d1e6f523f4983730da61c26aa429fba979003 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Mon, 19 Sep 2022 15:30:51 -0700 Subject: [PATCH 20/52] _asis parsers now leave coordinates unchanged. New parse_particles and parse_neutrinos introduced which rescale the coordinates to voxel coordinates by default, and behave like _asis parsers if voxel_coordinates is set to False --- analysis/classes/ui.py | 2 +- config/chain/metrics.cfg | 4 +- docs/source/HowTo.rst | 14 ++-- mlreco/iotools/parsers/__init__.py | 10 +-- mlreco/iotools/parsers/cluster.py | 10 +-- mlreco/iotools/parsers/particles.py | 108 +++++++++++++++++----------- test/test_parser.py | 2 +- 7 files changed, 89 insertions(+), 61 deletions(-) diff --git a/analysis/classes/ui.py b/analysis/classes/ui.py index f2277c89..0536ff9a 100644 --- a/analysis/classes/ui.py +++ b/analysis/classes/ui.py @@ -752,7 +752,7 @@ class FullChainEvaluator(FullChainPredictor): - particle_corrected - cluster3d_pcluster particles_asis: - - parse_particle_asis + - parse_particles - particle_pcluster - cluster3d_pcluster diff --git a/config/chain/metrics.cfg b/config/chain/metrics.cfg index 3e53ff5e..6c109f58 100644 --- a/config/chain/metrics.cfg +++ b/config/chain/metrics.cfg @@ -46,8 +46,8 @@ iotool: - parse_particle_graph_corrected - particle_corrected - cluster3d_pcluster - particles_asis: - - parse_particle_asis + particles: + - parse_particles - particle_pcluster - cluster3d_pcluster meta: diff --git a/docs/source/HowTo.rst b/docs/source/HowTo.rst index 9df97d93..ce9c5ff1 100644 --- a/docs/source/HowTo.rst +++ b/docs/source/HowTo.rst @@ -48,24 +48,24 @@ data blob: How to get true particle information ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -You need to use the parser ``particles_asis``. For example: +You need to use the parser ``parse_particles``. For example: .. code-block:: yaml iotool: dataset: schema: - particles_asis: - - parse_particle_asis + particles: + - parse_particles - particle_pcluster - cluster3d_pcluster -Then you will be able to access ``data['particles_asis'][entry]`` +Then you will be able to access ``data['particles'][entry]`` which is a list of objects of type ``larcv::Particle``. .. code-block:: python - for p in data['particles_asis'][entry]: + for p in data['particles'][entry]: mom = np.array([p.px(), p.py(), p.pz()]) print(p.id(), p.num_voxels(), mom/np.linalg.norm(mom)) @@ -107,7 +107,7 @@ How to get true neutrino information Assuming you are either using a Singularity container that has the right larcv2 compiled or you followed the note above explaining how to get it -by yourself, you can use the ``parse_neutrino_asis`` parser of ``lartpc_mlreco3d``. +by yourself, you can use the ``parse_neutrinos`` parser of ``lartpc_mlreco3d``. .. code-block:: yaml @@ -116,7 +116,7 @@ by yourself, you can use the ``parse_neutrino_asis`` parser of ``lartpc_mlreco3d dataset: schema: neutrinos: - - parse_neutrino_asis + - parse_neutrinos - neutrino_mpv - cluster3d_pcluster diff --git a/mlreco/iotools/parsers/__init__.py b/mlreco/iotools/parsers/__init__.py index a3ea33a1..a32f7fb8 100644 --- a/mlreco/iotools/parsers/__init__.py +++ b/mlreco/iotools/parsers/__init__.py @@ -24,8 +24,8 @@ .. csv-table:: Particle parsers :header: Parser name, Description - ``parse_particle_asis``, Retrieve array of larcv::Particle - ``parse_neutrino_asis``, Retrieve array of larcv::Neutrino + ``parse_particles``, Retrieve array of larcv::Particle + ``parse_neutrinos``, Retrieve array of larcv::Neutrino ``parse_particle_points``, Retrieve array of larcv::Particle ground truth points tensor ``parse_particle_coords``, Retrieve array of larcv::Particle coordinates (start and end) and start time ``parse_particle_graph``, Construct edges between particles (i.e. clusters) from larcv::EventParticle @@ -87,13 +87,15 @@ ) from mlreco.iotools.parsers.particles import ( - parse_particle_asis, - parse_neutrino_asis, + parse_particles, + parse_neutrinos, parse_particle_points, parse_particle_coords, parse_particle_graph, parse_particle_singlep_pdg, parse_particle_singlep_einit, + parse_particle_asis, # Deprecated + parse_neutrino_asis, # Deprecated parse_particle_points_with_tagging, # Deprecated parse_particle_graph_corrected # Deprecated ) diff --git a/mlreco/iotools/parsers/cluster.py b/mlreco/iotools/parsers/cluster.py index 5f852e82..2e954ba6 100644 --- a/mlreco/iotools/parsers/cluster.py +++ b/mlreco/iotools/parsers/cluster.py @@ -4,7 +4,7 @@ from mlreco.utils.groups import get_interaction_id, get_nu_id, get_particle_id, get_primary_id from mlreco.utils.groups import type_labels as TYPE_LABELS from mlreco.iotools.parsers.sparse import parse_sparse3d -from mlreco.iotools.parsers.particles import parse_particle_asis +from mlreco.iotools.parsers.particles import parse_particles from mlreco.iotools.parsers.clean_data import clean_sparse_data @@ -140,12 +140,12 @@ def parse_cluster3d(cluster_event, labels['type'] = get_particle_id(particles_v, nu_ids, include_mpr=type_include_mpr) labels['primary'] = get_primary_id(cluster_event, particles_v) if add_kinematics_info: - particles_asis_v = parse_particle_asis(particle_event, cluster_event) + particles_v = parse_particles(particle_event, cluster_event) labels['type'] = get_particle_id(particles_v, nu_ids, include_mpr=type_include_mpr) labels['p'] = np.array([(p.px()**2+p.py()**2+p.pz()**2)/1e3 for p in particles_v]) - labels['vtx_x'] = np.array([p.ancestor_position().x() for p in particles_asis_v]) - labels['vtx_y'] = np.array([p.ancestor_position().y() for p in particles_asis_v]) - labels['vtx_z'] = np.array([p.ancestor_position().z() for p in particles_asis_v]) + labels['vtx_x'] = np.array([p.ancestor_position().x() for p in particles_v]) + labels['vtx_y'] = np.array([p.ancestor_position().y() for p in particles_v]) + labels['vtx_z'] = np.array([p.ancestor_position().z() for p in particles_v]) labels['primary_group'] = np.array([p.group_id()==p.parent_id() for p in particles_v], dtype=np.float32) labels['sem'] = np.array([p.shape() for p in particles_v]) diff --git a/mlreco/iotools/parsers/particles.py b/mlreco/iotools/parsers/particles.py index b29a7203..48661ef9 100644 --- a/mlreco/iotools/parsers/particles.py +++ b/mlreco/iotools/parsers/particles.py @@ -4,89 +4,103 @@ from mlreco.utils.groups import type_labels as TYPE_LABELS -def parse_particle_asis(particle_event, cluster_event): +def parse_particles(particle_event, cluster_event, voxel_coordinates=True): """ - A function to copy construct & return an array of larcv::Particle + A function to copy construct & return an array of larcv::Particle. + + If `voxel_coordinates` is set to `True`, the parser rescales the truth + positions (start, end, etc.) to voxel coordinates. .. code-block:: yaml schema: - particle_asis: - parser: parse_particle_asis + particles: + parser: parse_particles args: particle_event: particle_pcluster cluster_event: cluster3d_pcluster + voxel_coordinates: True Configuration ------------- particle_event: larcv::EventParticle cluster_event: larcv::EventClusterVoxel3D to translate coordinates + voxel_coordinates: bool Returns ------- list - a python list of larcv::Particle object + a python list of larcv::Particle objects """ particles = [larcv.Particle(p) for p in particle_event.as_vector()] - meta = cluster_event.meta() - funcs = ['first_step', 'last_step', 'position', 'end_position', 'ancestor_position'] - for p in particles: - for f in funcs: - pos = getattr(p,f)() - x = (pos.x() - meta.min_x()) / meta.size_voxel_x() - y = (pos.y() - meta.min_y()) / meta.size_voxel_y() - z = (pos.z() - meta.min_z()) / meta.size_voxel_z() - # x = (pos.x() - meta.origin().x) / meta.size_voxel_x() - # y = (pos.y() - meta.origin().y) / meta.size_voxel_y() - # z = (pos.z() - meta.origin().z) / meta.size_voxel_z() - # x = pos.x() * meta.size_voxel_x() + meta.origin().x - # y = pos.y() * meta.size_voxel_y() + meta.origin().y - # z = pos.z() * meta.size_voxel_z() + meta.origin().z - getattr(p,f)(x,y,z,pos.t()) + if voxel_coordinates: + meta = cluster_event.meta() + funcs = ['first_step', 'last_step', 'position', 'end_position', 'ancestor_position'] + for p in particles: + for f in funcs: + pos = getattr(p,f)() + x = (pos.x() - meta.min_x()) / meta.size_voxel_x() + y = (pos.y() - meta.min_y()) / meta.size_voxel_y() + z = (pos.z() - meta.min_z()) / meta.size_voxel_z() + # x = (pos.x() - meta.origin().x) / meta.size_voxel_x() + # y = (pos.y() - meta.origin().y) / meta.size_voxel_y() + # z = (pos.z() - meta.origin().z) / meta.size_voxel_z() + # x = pos.x() * meta.size_voxel_x() + meta.origin().x + # y = pos.y() * meta.size_voxel_y() + meta.origin().y + # z = pos.z() * meta.size_voxel_z() + meta.origin().z + getattr(p,f)(x,y,z,pos.t()) + return particles -def parse_neutrino_asis(neutrino_event, cluster_event): +def parse_neutrinos(neutrino_event, cluster_event, voxel_coordinates=True): """ - A function to copy construct & return an array of larcv::Neutrino + A function to copy construct & return an array of larcv::Neutrino. + + If `voxel_coordinates` is set to `True`, the parser rescales the truth + position information to voxel coordinates. .. code-block:: yaml schema: - neutrino_asis: - parser: parse_neutrino_asis + neutrinos: + parser: parse_neutrinos args: neutrino_event: neutrino_mpv cluster_event: cluster3d_pcluster + voxel_coordinates: True Configuration ------------- neutrino_pcluster: larcv::EventNeutrino cluster3d_pcluster: larcv::EventClusterVoxel3D to translate coordinates + voxel_coordinates: bool Returns ------- list - a python list of larcv::Neutrino object + a python list of larcv::Neutrino objects """ neutrinos = [larcv.Neutrino(p) for p in neutrino_event.as_vector()] - meta = cluster_event.meta() - funcs = ['position'] - for p in neutrinos: - for f in funcs: - pos = getattr(p,f)() - x = (pos.x() - meta.min_x()) / meta.size_voxel_x() - y = (pos.y() - meta.min_y()) / meta.size_voxel_y() - z = (pos.z() - meta.min_z()) / meta.size_voxel_z() - # x = (pos.x() - meta.origin().x) / meta.size_voxel_x() - # y = (pos.y() - meta.origin().y) / meta.size_voxel_y() - # z = (pos.z() - meta.origin().z) / meta.size_voxel_z() - # x = pos.x() * meta.size_voxel_x() + meta.origin().x - # y = pos.y() * meta.size_voxel_y() + meta.origin().y - # z = pos.z() * meta.size_voxel_z() + meta.origin().z - getattr(p,f)(x,y,z,pos.t()) + if voxel_coordinates: + meta = cluster_event.meta() + funcs = ['position'] + for p in neutrinos: + for f in funcs: + pos = getattr(p,f)() + x = (pos.x() - meta.min_x()) / meta.size_voxel_x() + y = (pos.y() - meta.min_y()) / meta.size_voxel_y() + z = (pos.z() - meta.min_z()) / meta.size_voxel_z() + # x = (pos.x() - meta.origin().x) / meta.size_voxel_x() + # y = (pos.y() - meta.origin().y) / meta.size_voxel_y() + # z = (pos.z() - meta.origin().z) / meta.size_voxel_z() + # x = pos.x() * meta.size_voxel_x() + meta.origin().x + # y = pos.y() * meta.size_voxel_y() + meta.origin().y + # z = pos.z() * meta.size_voxel_z() + meta.origin().z + getattr(p,f)(x,y,z,pos.t()) + return neutrinos @@ -166,7 +180,7 @@ def parse_particle_coords(particle_event, cluster_event): last_step_x, last_step_y, last_step_z, first_step_t, shape_id] ''' # Scale particle coordinates to image size - particles = parse_particle_asis(particle_event, cluster_event) + particles = parse_particles(particle_event, cluster_event) # Make features particle_feats = [] @@ -322,6 +336,18 @@ def parse_particle_singlep_einit(particle_event): return -1 +def parse_particle_asis(particle_event, cluster_event): + from warnings import warn + warn("Deprecated: parse_particle_asis is deprecated, use parse_particles with voxel_coordinates set to False", DeprecationWarning) + return parse_particles(particle_event, cluster_event, voxel_coordinates=False) + + +def parse_neutrino_asis(neutrino_event, cluster_event): + from warnings import warn + warn("Deprecated: parse_neutrino_asis is deprecated, use parse_neutrinos with voxel_coordinates set to False", DeprecationWarning) + return parse_neutrinos(neutrino_event, cluster_event, voxel_coordinates=False) + + def parse_particle_points_with_tagging(sparse_event, particle_event): from warnings import warn warn("Deprecated: parse_particle_points_with_tagging deprecated, use parse_particle_points instead", DeprecationWarning) diff --git a/test/test_parser.py b/test/test_parser.py index a19a1929..c24bb75e 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -261,7 +261,7 @@ def test_parse_semantics(): def test_parse_weights(): pass -def test_parse_particle_asis(): +def test_parse_particles(): pass @pytest.mark.parametrize("event_cluster3d", [3], indirect=True) From 5928268d6653013cd58e7f28cab64b94476fd642 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Fri, 23 Sep 2022 09:57:59 -0700 Subject: [PATCH 21/52] Added a vertex prediction heuristic if an interaction has a single primary --- mlreco/utils/vertex.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/mlreco/utils/vertex.py b/mlreco/utils/vertex.py index f1b2424f..42617041 100644 --- a/mlreco/utils/vertex.py +++ b/mlreco/utils/vertex.py @@ -308,9 +308,29 @@ def predict_vertex(inter_idx, data_idx, input_data, res, all_voxels = input_data[data_idx] if 'ghost' in res and apply_deghosting: - mask_ghost = np.argmax(res['ghost'][data_idx], axis=1) == 0 - all_voxels = input_data[data_idx][mask_ghost] - + if 'input_rescaled' not in res: + mask_ghost = np.argmax(res['ghost'][data_idx], axis=1) == 0 + all_voxels = input_data[data_idx][mask_ghost] + else: + all_voxels = res['input_rescaled'][data_idx] + + # Handle the case where only a single primary is available + if len(ppn_candidates) == 1: + particle_seg = res['particles_seg'][data_idx][inter_mask][c_indices[0]] + end_points = res['particle_node_features'][data_idx][inter_mask][primary_particles][c_indices[0], -9:-3].reshape(-1,3) + if particle_seg != 1: + # If there's a single shower object, pick the shower start point + return end_points[0] + else: + # If there's a single track, pick the end point with the lowest local charge density + voxels = all_voxels[c_candidates[0], coords_col[0]:coords_col[1]] + dist_mat = scipy.spatial.distance.cdist(end_points, voxels) + mask = dist_mat < 5 + charges = all_voxels[c_candidates[0],4] + locald = [np.sum(charges[mask[0]]), np.sum(charges[mask[1]])] + return end_points[np.argmin(locald)] + + # Handle all other cases ppn_candidates2 = [] directions = [] distances_others, distances_primaries = [], [] From 5e33fa9c5c415e57efc7f7271c6b934abcd1575f Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Wed, 28 Sep 2022 16:44:32 -0700 Subject: [PATCH 22/52] Fixed inter_cluster_distance function, added distance method option --- mlreco/utils/gnn/network.py | 43 +++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/mlreco/utils/gnn/network.py b/mlreco/utils/gnn/network.py index 51b5c934..c202a8b8 100644 --- a/mlreco/utils/gnn/network.py +++ b/mlreco/utils/gnn/network.py @@ -6,7 +6,7 @@ from scipy.spatial import Delaunay from scipy.sparse.csgraph import minimum_spanning_tree -from mlreco.utils.numba import numba_wrapper, submatrix_nb, cdist_nb +from mlreco.utils.numba import numba_wrapper, submatrix_nb, cdist_nb, mean_nb @nb.njit(cache=True) @@ -421,7 +421,7 @@ def _get_edge_distances(voxels: nb.float32[:,:], @numba_wrapper(cast_args=['voxels'], list_args=['clusts']) -def inter_cluster_distance(voxels, clsuts, batch_ids): +def inter_cluster_distance(voxels, clusts, batch_ids=None, mode='voxel'): """ Finds the inter-cluster distance between every pair of clusters within each batch, returned as a block-diagonal matrix. @@ -430,24 +430,45 @@ def inter_cluster_distance(voxels, clsuts, batch_ids): voxels (torch.tensor) : (N,3) Tensor of voxel coordinates clusts ([np.ndarray]) : (C) List of arrays of voxel IDs in each cluster batch_ids (np.ndarray): (C) List of cluster batch IDs + mode (str) : Eiher use closest voxel distance (`voxel`) or centroid distance (`centroid`) Returns: torch.tensor: (C,C) Tensor of pair-wise cluster distances """ + # If there is no batch_ids provided, assign 0 to all clusters + if batch_ids is None: + batch_ids = np.zeros(len(clusts), dtype=np.int64) + return _inter_cluster_distance(voxels, clusts, batch_ids, mode) -@nb.njit(parallel=True, cache=True) +@nb.njit(parallel=True) def _inter_cluster_distance(voxels: nb.float64[:,:], clusts: nb.types.List(nb.int64[:]), - batch_ids: nb.int64[:]) -> nb.float64[:,:]: + batch_ids: nb.int64[:], + mode: str = 'voxel') -> nb.float64[:,:]: + assert len(clusts) == len(batch_ids) dist_mat = np.zeros((len(batch_ids), len(batch_ids)), dtype=voxels.dtype) - for i in nb.prange(len(batch_ids)): - for j in range(len(batch_ids)): - if batch_ids[i] == batch_ids[j]: - if i < j: - dist_mat[i,j] = np.min(cdist_nb(voxels[clusts[i]], voxels[clusts[j]])) - elif i > j: - dist_mat[i,j] = dist_mat[j,i] + if mode == 'voxel': + for i in nb.prange(len(batch_ids)): + for j in range(len(batch_ids)): + if batch_ids[i] == batch_ids[j]: + if i < j: + dist_mat[i,j] = np.min(cdist_nb(voxels[clusts[i]], voxels[clusts[j]])) + elif i > j: + dist_mat[i,j] = dist_mat[j,i] + elif mode == 'centroid': + centroids = np.empty((len(batch_ids), voxels.shape[1]), dtype=voxels.dtype) + for i in nb.prange(len(batch_ids)): + centroids[i] = mean_nb(voxels[clusts[i]], axis=0) + for i in nb.prange(len(batch_ids)): + for j in range(len(batch_ids)): + if batch_ids[i] == batch_ids[j]: + if i < j: + dist_mat[i,j] = np.sqrt(np.sum((centroids[j]-centroids[i])**2)) + else: + dist_mat[i,j] = dist_mat[j,i] + else: + raise ValueError('Inter-cluster distance mode not supported') return dist_mat From 1a9f1f51d0ece3e0de67069025b0a8c5d26956f7 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Wed, 28 Sep 2022 23:19:55 -0700 Subject: [PATCH 23/52] Bug fix in vertex heuristic for n_primary=1 case --- mlreco/utils/vertex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlreco/utils/vertex.py b/mlreco/utils/vertex.py index 42617041..6c6527b9 100644 --- a/mlreco/utils/vertex.py +++ b/mlreco/utils/vertex.py @@ -316,7 +316,7 @@ def predict_vertex(inter_idx, data_idx, input_data, res, # Handle the case where only a single primary is available if len(ppn_candidates) == 1: - particle_seg = res['particles_seg'][data_idx][inter_mask][c_indices[0]] + particle_seg = res['particles_seg'][data_idx][inter_mask][primary_particles][c_indices[0]] end_points = res['particle_node_features'][data_idx][inter_mask][primary_particles][c_indices[0], -9:-3].reshape(-1,3) if particle_seg != 1: # If there's a single shower object, pick the shower start point From 3336fa86444983f1a39cdda2f1cf4166054fa0ae Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Wed, 19 Oct 2022 17:12:51 -0700 Subject: [PATCH 24/52] Bug fix in network topology visualization tool cluster label casting --- mlreco/visualization/gnn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlreco/visualization/gnn.py b/mlreco/visualization/gnn.py index 938db03f..fd4e2d80 100644 --- a/mlreco/visualization/gnn.py +++ b/mlreco/visualization/gnn.py @@ -57,7 +57,7 @@ def network_topology(voxels, clusters, edge_index=[], clust_labels=[], edge_labe # Define the node features (label, color) n = len(clusters) if not len(clust_labels): clust_labels = np.ones(n) - if len(clust_labels) and isinstance(clust_labels[0], float): + if len(clust_labels) and not float(clust_labels[0]).is_integer(): node_labels = ['Instance ID: %d
Group ID: %0.3f
Centroid: (%0.1f, %0.1f, %0.1f)' % (i, clust_labels[i], pos[i,0], pos[i,1], pos[i,2]) for i in range(n)] else: node_labels = ['Instance ID: %d
Group ID: %d
Centroid: (%0.1f, %0.1f, %0.1f)' % (i, clust_labels[i], pos[i,0], pos[i,1], pos[i,2]) for i in range(n)] From 62f7d6564445ac2c60d713540ec87cccd7a667e7 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Wed, 19 Oct 2022 20:27:04 -0700 Subject: [PATCH 25/52] Fix inter_cluster_distance function (was parallelized but not thread safe) --- mlreco/utils/gnn/network.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/mlreco/utils/gnn/network.py b/mlreco/utils/gnn/network.py index c202a8b8..0f50b2f8 100644 --- a/mlreco/utils/gnn/network.py +++ b/mlreco/utils/gnn/network.py @@ -440,7 +440,7 @@ def inter_cluster_distance(voxels, clusts, batch_ids=None, mode='voxel'): return _inter_cluster_distance(voxels, clusts, batch_ids, mode) -@nb.njit(parallel=True) +@nb.njit(parallel=True, cache=True) def _inter_cluster_distance(voxels: nb.float64[:,:], clusts: nb.types.List(nb.int64[:]), batch_ids: nb.int64[:], @@ -448,25 +448,20 @@ def _inter_cluster_distance(voxels: nb.float64[:,:], assert len(clusts) == len(batch_ids) dist_mat = np.zeros((len(batch_ids), len(batch_ids)), dtype=voxels.dtype) + indxi, indxj = np.triu_indices(len(batch_ids), 1) if mode == 'voxel': - for i in nb.prange(len(batch_ids)): - for j in range(len(batch_ids)): - if batch_ids[i] == batch_ids[j]: - if i < j: - dist_mat[i,j] = np.min(cdist_nb(voxels[clusts[i]], voxels[clusts[j]])) - elif i > j: - dist_mat[i,j] = dist_mat[j,i] + for k in nb.prange(len(indxi)): + i, j = indxi[k], indxj[k] + if batch_ids[i] == batch_ids[j]: + dist_mat[i,j] = dist_mat[j,i] = np.min(cdist_nb(voxels[clusts[i]], voxels[clusts[j]])) elif mode == 'centroid': centroids = np.empty((len(batch_ids), voxels.shape[1]), dtype=voxels.dtype) for i in nb.prange(len(batch_ids)): centroids[i] = mean_nb(voxels[clusts[i]], axis=0) - for i in nb.prange(len(batch_ids)): - for j in range(len(batch_ids)): - if batch_ids[i] == batch_ids[j]: - if i < j: - dist_mat[i,j] = np.sqrt(np.sum((centroids[j]-centroids[i])**2)) - else: - dist_mat[i,j] = dist_mat[j,i] + for k in nb.prange(len(indxi)): + i, j = indxi[k], indxj[k] + if batch_ids[i] == batch_ids[j]: + dist_mat[i,j] = dist_mat[j,i] = np.sqrt(np.sum((centroids[j]-centroids[i])**2)) else: raise ValueError('Inter-cluster distance mode not supported') From 0ce6ea85224454ad3bd5709f8d7f796338c7233b Mon Sep 17 00:00:00 2001 From: Temigo Date: Thu, 20 Oct 2022 19:25:11 -0700 Subject: [PATCH 26/52] Small fix to Francois' changes --- mlreco/models/layers/common/gnn_full_chain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlreco/models/layers/common/gnn_full_chain.py b/mlreco/models/layers/common/gnn_full_chain.py index 26d4a08d..bf26c796 100644 --- a/mlreco/models/layers/common/gnn_full_chain.py +++ b/mlreco/models/layers/common/gnn_full_chain.py @@ -726,7 +726,7 @@ def forward(self, out, seg_label, ppn_label=None, cluster_label=None, kinematics segmentation_pred = out['segmentation'][0] - if self.enable_ghost and not self.enable_charge_rescaling: + if self.enable_ghost: #and not self.enable_charge_rescaling: segmentation_pred = segmentation_pred[deghost] if self._gspice_use_true_labels: gs_seg_label = torch.cat([cluster_label[0][:, :4], segment_label[:, None]], dim=1) From b2c21882189dce743b2675717cb9f6f84fdd9321 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Tue, 25 Oct 2022 10:15:15 -0700 Subject: [PATCH 27/52] Added routines useful to draw a training curve and its corresponding validation points --- mlreco/visualization/training.py | 301 +++++++++++++++++++++++++++++++ 1 file changed, 301 insertions(+) create mode 100644 mlreco/visualization/training.py diff --git a/mlreco/visualization/training.py b/mlreco/visualization/training.py new file mode 100644 index 00000000..0cecd28e --- /dev/null +++ b/mlreco/visualization/training.py @@ -0,0 +1,301 @@ +import glob +import numpy as np +import pandas as pd +import seaborn as sns +import matplotlib as mpl +from matplotlib import pyplot as plt +from plotly.offline import iplot +from plotly import graph_objs as go +from plotly import subplots as psubplots +from plotly import colors as pcolors + +def set_size(width, fraction=1): + """ + Returns optimal figure dimension for a latex + plot, depending on the requested width. + + Args: + width (int) : Width of the figure + fraction (float): Fraction of the width + """ + # Width of figure (in pts) + fig_width_pt = width * fraction + + # Convert from pt to inches + inches_per_pt = 1 / 72.27 + + # Golden ratio to set aesthetic figure height + # https://disq.us/p/2940ij3 + golden_ratio = (5**.5 - 1) / 2 + + # Figure width in inches + fig_width_in = fig_width_pt * inches_per_pt + + # Figure height in inches + fig_height_in = fig_width_in * golden_ratio + + return (fig_width_in, fig_height_in) + + +def apply_latex_style(): + """ + Sets the necessary matplotlib and seaborn parameters + to draw a plot using latex. + """ + sns.set(rc={'figure.figsize':set_size(250), + 'text.usetex':True, + 'font.family': 'serif', + 'axes.labelsize': 8, + 'font.size': 8, + 'legend.fontsize': 8, + 'legend.labelspacing': 0.25, + 'legend.columnspacing': 0.25, + 'xtick.labelsize': 8, + 'ytick.labelsize': 8,}, context='paper') + sns.set_style('white') + sns.set_style(rc={'axes.grid':True, 'font.family': 'serif'}) + mpl.rcParams['text.latex.preamble'] = [r"\usepackage{amsmath,bm}"] + + +def find_key(df, key_list, separator=':'): + """ + Checks if a DataFrame contains any of the keys listed + in a character-separated string. + + Args: + df (pandas.DataFrame): Pandas dataframe (or dictionary) containing data + key_lit (str) : Character-separated list of keys + separator (str) : Separation character between keys + Returns: + str: Key found + str: Name of the first key (for legend purposes) + """ + key_list = key_list.split(separator) + key_name = key_list[0] + key_found = np.array([k in df.keys() for k in key_list]) + if not np.any(key_found): + raise KeyError('Could not find the keys provided:', key_list) + key = key_list[np.where(key_found)[0][0]] + return key, key_name + + +def get_training_df(log_dir, prefix='train'): + """ + Finds all training log files inside the specified directory + and concatenates them. If the range of iterations overlap, keep only + that from the file started further in the training. + + Assumes that the formatting of the log file names is of the form + `prefix-x.csv`, with `x` the number of iterations. + + Args: + log_dir (str): Path to the directory that contains the training log files + prefix (str) : Prefix shared between training file names (default: `train`) + Returns: + pandas.DataFrame: Combined training log data + """ + log_files = np.array(glob.glob(f'{log_dir}/{prefix}*')) + end_points = np.array([int(f.split('-')[-1].split('.csv')[0]) for f in log_files]) + order = np.argsort(end_points) + end_points = np.append(end_points[order], 1e12) + return pd.concat([pd.read_csv(f, nrows=end_points[i+1]-end_points[i]) for i, f in enumerate(log_files[order])], sort=True) + + +def get_validation_df(log_dir, keys, prefix='inference'): + """ + Finds all validation log files inside the specified directory + and build a single dataframe out of them. It returns the mean and + std of the requested keys for each file. + + Assumes that the formatting of the log file names is of the form + `prefix-x.csv`, with `x` the number of iterations. + + The key list allows for `:`-separated names, in case separate files + use different names for the same quantity. + + Args: + log_dir (str): Path to the directory that contains the validation log files + keys (list) : List of quantities to get mean/std for + prefix (str) : Prefix shared between validation file names (default: `inference`) + Returns: + pandas.DataFrame: Combined validation log data + """ + # Initialize a dictionary + val_data = {'iter':[]} + for key in keys: + key_name = key.split(':')[0] + val_data[key_name+'_mean'] = [] + val_data[key_name+'_err'] = [] + + # Loop over validation log files + log_files = np.array(glob.glob(f'{log_dir}/{prefix}*')) + for log_file in log_files: + df = pd.read_csv(log_file) + it = int(log_file.split('/')[-1].split('-')[-1].split('.')[0]) + val_data['iter'].append(it) + for key_list in keys: + key, key_name = find_key(df, key_list) + val_data[f'{key_name}_mean'].append(df[key].mean()) + val_data[f'{key_name}_err'].append(df[key].std()/np.sqrt(len(df[key]))) + + args = np.argsort(val_data['iter']) + for key, val in val_data.items(): + val_data[key] = np.array(val)[args] + + return pd.DataFrame(val_data) + + +def draw_training_curves(log_dir, models, metrics, + limits={}, model_names={}, metric_names={}, + max_iter=-1, step=1, smoothing=1, print_min=False, print_max=False, + plotly=True, same_plot=True, paper=False, leg_ncols=1, + figure_name='', train_prefix='train', val_prefix='inference'): + """ + Finds all training and validation log files inside the specified + directory and draws an evolution plot of the request quantities. + + Args: + log_dir (str) : Path to the directory that contains the folder with log files + models (list) : List of model (folder) names under the main directory + metrics (list) : List of quantities to draw + limits (list/dict) : List of y boundaries for the plot (or dictionary of y boundaries, one per metric) + model_names (dict) : Dictionary which maps raw model names to model labels (default: empty dict) + metric_names (dict): Dictionary which maps raw metric names to metric labels (default: empty dict) + max_iter (int) : Maximum number of interation to include in the plot (default: -1) + step (int) : Step between two successive iterations that are represented (default: 1) + smoothing (int) : Number of iteration over which to average the metric value (default: 1) + plotly (bool) : Use plotly to draw (interactive) + same_plot (bool) : Draw all model/metric pairs on a single plot (default: True) + paper (bool) : Format plot for paper (use latex) + leg_ncols (int) : Number of columns in the legend (default: 3) + figure_name (str) : Name of the figure. If specified, figure is saved (default: '') + train_prefix (str) : Prefix shared between training file names (default: `train`) + val_prefix (str) : Prefix shared between validation file names (default: `inference`) + """ + # Set the style + plotly_colors = pcolors.convert_colors_to_same_type(pcolors.DEFAULT_PLOTLY_COLORS, 'tuple')[0] + if not plotly: + cr_char = '\n' + if paper: + apply_latex_style() + linewidth = 0.5 + markersize = 1 + else: + sns.set(rc={'figure.figsize':(16,9)}, context='notebook', font_scale=2) + sns.set_style('white') + sns.set_style(rc={'axes.grid':True}) + linewidth = 2 + markersize = 10 + else: + graphs = [] + cr_char = '
' + converter = lambda color: 'rgba({}, {}, {}, 0.5)'.format(*color) + plotly_colors = pcolors.color_parser(plotly_colors, converter) + layout = go.Layout(template='plotly_white', width=1000, height=500, margin=dict(r=20, l=20, b=20, t=20), + xaxis=dict(title=dict(text='Epochs', font=dict(size=20)), tickfont=dict(size=20), linecolor='black', mirror=True), + yaxis=dict(title=dict(text='Metric', font=dict(size=20)), tickfont=dict(size=20), linecolor='black', mirror=True), + legend=dict(font=dict(size=20))) + if len(models) == 1 and same_plot: + layout['legend']['title'] = model_names[models[0]] if models[0] in model_names else models[0] + + + # If there is >1 subplot, prepare the canvas + if not same_plot: + if not plotly: + fig, axes = plt.subplots(len(metrics), sharex=True) + fig.subplots_adjust(hspace=0) + for axis in axes: + axis.set_facecolor('white') + else: + fig = psubplots.make_subplots(rows=len(metrics), shared_xaxes=True, vertical_spacing=0) + for i in range(len(metrics)): + if i > 0: + layout[f'xaxis{i+1}'] = layout['xaxis'] + layout[f'yaxis{i+1}'] = layout['yaxis'] + layout[f'xaxis{i+1}']['title']['text'] = '' if i < len(metrics)-1 else 'Epochs' + layout[f'yaxis{i+1}']['title']['text'] = metric_names[metrics[i]] if metrics[i] in metric_names else metrics[i] + if metrics[i] in limits and len(limits[metrics[i]]) == 2: + layout[f'yaxis{i+1}']['range'] = limits[metrics[i]] + + fig.update_layout(layout) + + elif plotly: + if isinstance(limits, list) and len(limits) == 2: + layout['yaxis']['range'] = limits + fig = go.Figure(layout=layout) + + # Get the DataFrames for the requested models/metrics + dfs, val_dfs, colors = {}, {}, {} + for i, key in enumerate(models): + log_subdir = log_dir+key + dfs[key] = get_training_df(log_subdir, train_prefix) + val_dfs[key] = get_validation_df(log_subdir, metrics, val_prefix) + colors[key] = plotly_colors[i] + + # Draw the requested metrics + for i, metric_list in enumerate(metrics): + # Draw the training curves + metric, metric_name = find_key(dfs[key], metric_list) + for j, key in enumerate(dfs.keys()): + # Get the necessary data + epoch_train = dfs[key]['epoch'][:max_iter:step] + metric_train = dfs[key][metric][:max_iter:step] if smoothing == 1 else dfs[key][metric][:max_iter].rolling(smoothing, min_periods=1, center=True).mean()[::step] + draw_val = bool(len(val_dfs[key]['iter'])) + if draw_val: + mask_val = val_dfs[key]['iter'] < max_iter if max_iter > -1 else val_dfs[key]['iter'] < 1e12 + iter_val = val_dfs[key]['iter'][mask_val] + epoch_val = [float(dfs[key]['epoch'][dfs[key]['iter'] == it]) for it in iter_val] + metricm_val = val_dfs[key][metric_name+'_mean'][mask_val] + metrice_val = val_dfs[key][metric_name+'_err'][mask_val] + + # Pick a label for this specific model/metric pair + if not same_plot: + label = model_names[key] if key in model_names else key + else: + if len(models) == 1: + label = metric_names[metric_name] if metric_name in metric_names else metric_name + else: + label = f'{metric_names[metric_name] if metric_name in metric_names else metric_name} ({model_names[key] if key in model_names else key})' + if print_min and draw_val: + label += f'{cr_char}Min: {iter_val[np.argmin(metricm_val)]:d}' + if print_max and draw_val: + label += f'{cr_char}Max: {iter_val[np.argmax(metricm_val)]:d}' + + # Prepare the relevant plots + color = colors[key] if not same_plot else plotly_colors[i*len(models)+j] + if not plotly: + axis = plt if same_plot else axes[i] + axis.plot(epoch_train, metric_train, label=label, color=color, alpha=0.5, linewidth=linewidth) + if draw_val: + axis.errorbar(epoch_val, metricm_val, yerr=metrice_val, fmt='.', color=color, linewidth=linewidth, markersize=markersize) + else: + graphs += [go.Scatter(x=epoch_train, y=metric_train, name=label, line=dict(color=color), showlegend=(same_plot | (not same_plot and not i)))] + if draw_val: + hovertext = [f'(Iteration: {iter_val[i]:d})' for i in range(len(iter_val))] + # hovertext = [f'(Iteration: {iter_val[i]:d}, Epoch: {epoch_val[i]:0.3f}, Metric: {metricm_val[i]:0.3f})' for i in range(len(iter_val))] + graphs += [go.Scatter(x=epoch_val, y=metricm_val, error_y_array=metrice_val, mode='markers', hovertext=hovertext, marker=dict(color=color), showlegend=False)] + + if not plotly: + if not same_plot: + for i, metric in enumerate(metrics): + metric_name = metric.split(':')[0] + axes[i].set_xlabel('Epochs') + axes[i].set_ylabel(metric_names[metric_name] if metric_name in metric_names else metric_name) + if metric_name in limits and len(limits[metric_name]) == 2: + axes[i].set_ylim(limits[metric_name]) + axes[0].legend(ncol=leg_ncols) + else: + plt.xlabel('Epochs') + plt.ylabel('Metric') + plt.gca().set_ylim(limits) + legend_title = model_names[models[0]] if models[0] in model_names else models[0] + plt.legend(ncol=leg_ncols, title=legend_title if len(models)==1 else None) + if len(figure_name): + plt.savefig(f'{figure_name}.pdf', bbox_inches='tight') + plt.show() + else: + if not same_plot: + fig.add_traces(graphs, rows=list(np.arange(len(metrics), step=1./(2*len(models))).astype(int)+1), cols=list(np.ones(2*len(models)*len(metrics), dtype=int))) + else: + fig.add_traces(graphs) + iplot(fig) From cac961c06172eac62891b7f7ed74adb47b5a6876 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Tue, 25 Oct 2022 10:52:42 -0700 Subject: [PATCH 28/52] Tweaks to the training curve drawer --- mlreco/visualization/training.py | 41 ++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/mlreco/visualization/training.py b/mlreco/visualization/training.py index 0cecd28e..879f5674 100644 --- a/mlreco/visualization/training.py +++ b/mlreco/visualization/training.py @@ -148,7 +148,7 @@ def get_validation_df(log_dir, keys, prefix='inference'): def draw_training_curves(log_dir, models, metrics, limits={}, model_names={}, metric_names={}, max_iter=-1, step=1, smoothing=1, print_min=False, print_max=False, - plotly=True, same_plot=True, paper=False, leg_ncols=1, + interactive=True, same_plot=True, paper=False, leg_ncols=1, figure_name='', train_prefix='train', val_prefix='inference'): """ Finds all training and validation log files inside the specified @@ -159,22 +159,22 @@ def draw_training_curves(log_dir, models, metrics, models (list) : List of model (folder) names under the main directory metrics (list) : List of quantities to draw limits (list/dict) : List of y boundaries for the plot (or dictionary of y boundaries, one per metric) - model_names (dict) : Dictionary which maps raw model names to model labels (default: empty dict) - metric_names (dict): Dictionary which maps raw metric names to metric labels (default: empty dict) - max_iter (int) : Maximum number of interation to include in the plot (default: -1) - step (int) : Step between two successive iterations that are represented (default: 1) - smoothing (int) : Number of iteration over which to average the metric value (default: 1) - plotly (bool) : Use plotly to draw (interactive) - same_plot (bool) : Draw all model/metric pairs on a single plot (default: True) - paper (bool) : Format plot for paper (use latex) - leg_ncols (int) : Number of columns in the legend (default: 3) - figure_name (str) : Name of the figure. If specified, figure is saved (default: '') + model_names (dict) : Dictionary which maps raw model names to model labels (default: `{}`) + metric_names (dict): Dictionary which maps raw metric names to metric labels (default: `{}`) + max_iter (int) : Maximum number of interation to include in the plot (default: `-1`) + step (int) : Step between two successive iterations that are represented (default: `1`) + smoothing (int) : Number of iteration over which to average the metric value (default: `1`) + interactive (bool) : Use plotly to draw (default: `True`) + same_plot (bool) : Draw all model/metric pairs on a single plot (default: `True`) + paper (bool) : Format plot for paper, using latex (default: `False`) + leg_ncols (int) : Number of columns in the legend (default: `1`) + figure_name (str) : Name of the figure. If specified, figure is saved (default: `''`) train_prefix (str) : Prefix shared between training file names (default: `train`) val_prefix (str) : Prefix shared between validation file names (default: `inference`) """ # Set the style plotly_colors = pcolors.convert_colors_to_same_type(pcolors.DEFAULT_PLOTLY_COLORS, 'tuple')[0] - if not plotly: + if not interactive: cr_char = '\n' if paper: apply_latex_style() @@ -201,7 +201,7 @@ def draw_training_curves(log_dir, models, metrics, # If there is >1 subplot, prepare the canvas if not same_plot: - if not plotly: + if not interactive: fig, axes = plt.subplots(len(metrics), sharex=True) fig.subplots_adjust(hspace=0) for axis in axes: @@ -218,10 +218,11 @@ def draw_training_curves(log_dir, models, metrics, layout[f'yaxis{i+1}']['range'] = limits[metrics[i]] fig.update_layout(layout) - - elif plotly: + elif interactive: if isinstance(limits, list) and len(limits) == 2: layout['yaxis']['range'] = limits + if len(metrics) == 1: + layout['yaxis']['title']['text'] = metric_names[metrics[0]] if metrics[0] in metric_names else metrics[0] fig = go.Figure(layout=layout) # Get the DataFrames for the requested models/metrics @@ -254,6 +255,8 @@ def draw_training_curves(log_dir, models, metrics, else: if len(models) == 1: label = metric_names[metric_name] if metric_name in metric_names else metric_name + elif len(metrics) == 1: + label = model_names[key] if key in model_names else key else: label = f'{metric_names[metric_name] if metric_name in metric_names else metric_name} ({model_names[key] if key in model_names else key})' if print_min and draw_val: @@ -263,7 +266,7 @@ def draw_training_curves(log_dir, models, metrics, # Prepare the relevant plots color = colors[key] if not same_plot else plotly_colors[i*len(models)+j] - if not plotly: + if not interactive: axis = plt if same_plot else axes[i] axis.plot(epoch_train, metric_train, label=label, color=color, alpha=0.5, linewidth=linewidth) if draw_val: @@ -275,7 +278,7 @@ def draw_training_curves(log_dir, models, metrics, # hovertext = [f'(Iteration: {iter_val[i]:d}, Epoch: {epoch_val[i]:0.3f}, Metric: {metricm_val[i]:0.3f})' for i in range(len(iter_val))] graphs += [go.Scatter(x=epoch_val, y=metricm_val, error_y_array=metrice_val, mode='markers', hovertext=hovertext, marker=dict(color=color), showlegend=False)] - if not plotly: + if not interactive: if not same_plot: for i, metric in enumerate(metrics): metric_name = metric.split(':')[0] @@ -286,7 +289,9 @@ def draw_training_curves(log_dir, models, metrics, axes[0].legend(ncol=leg_ncols) else: plt.xlabel('Epochs') - plt.ylabel('Metric') + ylabel = metric_names[metrics[0]] if metrics[0] in metric_names else metrics[0] + print(ylabel) + plt.ylabel(ylabel if len(metrics) == 1 else 'Metric') plt.gca().set_ylim(limits) legend_title = model_names[models[0]] if models[0] in model_names else models[0] plt.legend(ncol=leg_ncols, title=legend_title if len(models)==1 else None) From 349834478bcaf5d53346bc86beb1888289ce7eac Mon Sep 17 00:00:00 2001 From: Temigo Date: Thu, 27 Oct 2022 12:13:10 -0700 Subject: [PATCH 29/52] Add volume boundary split functionality in I/O tools --- mlreco/iotools/collates.py | 136 +++++++++++++++++- mlreco/iotools/factories.py | 10 +- mlreco/iotools/parsers/sparse.py | 57 +++++--- mlreco/models/full_chain.py | 3 - .../layers/gnn/losses/node_kinematics.py | 1 + mlreco/utils/deghosting.py | 3 - 6 files changed, 186 insertions(+), 24 deletions(-) diff --git a/mlreco/iotools/collates.py b/mlreco/iotools/collates.py index f9f5de54..9809411a 100644 --- a/mlreco/iotools/collates.py +++ b/mlreco/iotools/collates.py @@ -7,7 +7,124 @@ import numpy as np -def CollateSparse(batch): +class VolumeBoundaries: + """ + VolumeBoundaries is a helper class to deal with multiple detector volumes. Assume you have N + volumes that you want to process independently, but your input data file does not separate + between them (maybe it is hard to make the separation at simulation level, e.g. in Supera). + You can specify in the configuration of the collate function where the volume boundaries are + and this helper class will take care of the following: + + 1. Relabel batch ids: this will introduce "virtual" batch ids to account for each volume in + each batch. + + 2. Shift coordinates: voxel coordinates are shifted such that the origin is always the bottom + left corner of a volume. In other words, it ensures the voxel coordinate phase space is the + same regardless of which volume we are processing. That way you can train on a single volume + (subpart of the detector, e.g. cryostat or TPC) and process later however many volumes make up + your detector. + + 3. Sort coordinates: there is no guarantee that concatenating coordinates of N volumes vs the + stored coordinates for label tensors which cover all volumes already by default will yield the + same ordering. Hence we do a np.lexsort on coordinates after 1. and 2. have happened. We sort + by: batch id, z, y, x in this order. + + An example of configuration would be : + + ```yaml + collate: + collate_fn: Collatesparse + boundaries: [[1376.3], None, None] + ``` + + `boundaries` is what defines the different volumes. It has a length equal to the spatial dimension. + For each spatial dimension, `None` means that there is no boundary along that axis. + A list of floating numbers specifies the volume boundaries along that axis in voxel units. + The list of volumes will be inferred from this list of boundaries ("meshgrid" style, taking + all possible combinations of the boundaries to generate all the volumes). + """ + def __init__(self, definitions): + """ + See explanation of `boundaries` above. + + Parameters + ========== + definitions: list + """ + self.dim = len(definitions) + self.boundaries = definitions + + # Quick sanity check + for i in range(self.dim): + assert self.boundaries[i] == 'None' or (isinstance(self.boundaries[i], list) and len(self.boundaries[i]) > 0) + if self.boundaries[i] == 'None': + self.boundaries[i] = None + continue + self.boundaries[i].sort() # Ascending order + + + def split(self, voxels): + """ + Parameters + ========== + voxels: np.array, shape (N, 4) + It should contain (batch id, x, y, z) coordinates in this order (as an example if you are working in 3D). + + Returns + ======= + new_voxels: np.array, shape (N, 4) + The array contains voxels with shifted coordinates + virtual batch ids. This array is not yet permuted + to obey the lexsort. + perm: np.array, shape (N,) + This is a permutation mask which can be used to apply the lexsort to both the new voxels and the features + or data tensor (which is not passed to this function). + """ + coords = voxels[:, 1:] + assert len(coords.shape) == 2 + assert self.dim == coords.shape[1] + + all_boundaries, shifts = [], [] + n_boundaries =[] + for n in range(self.dim): + if self.boundaries[n] is None: + all_boundaries.append([np.ones((coords.shape[0],), dtype=bool)]) + shifts.append([0.]) + n_boundaries.append(0) + continue + dim_boundaries = [] + dim_shifts = [] + for i in range(len(self.boundaries[n])): + dim_boundaries.append( coords[:, n] < self.boundaries[n][i] ) + dim_shifts.append(self.boundaries[n][i-1] if i > 0 else 0.) + dim_boundaries.append( coords[:, n] >= self.boundaries[n][-1] ) + dim_shifts.append(self.boundaries[n][-1]) + all_boundaries.append(dim_boundaries) + shifts.append(dim_shifts) + n_boundaries.append(len(self.boundaries[n])) + + #n_volumes = np.prod([len(x) for x in all_boundaries]) + # Generate indices + all_index = [] + for n in range(self.dim): + all_index.append(np.arange(n_boundaries[n]+1)) + combo = np.array(np.meshgrid(*tuple(all_index))).T.reshape(-1, self.dim) + + virtual_batch_ids = np.zeros((coords.shape[0],), dtype=np.int32) + new_coords = coords.copy() + for idx, c in enumerate(combo): + m = all_boundaries[0][c[0]] + for n in range(1, self.dim): + m = np.logical_and(m, all_boundaries[n][c[n]]) + virtual_batch_ids[m] = idx + for n in range(self.dim): + new_coords[m, n] -= int(shifts[n][c[n]]) + + new_voxels = np.concatenate([virtual_batch_ids[:, None], new_coords], axis=1) + perm = np.lexsort(new_voxels.T[list(range(1, self.dim+1)) + [0], :]) + return new_voxels, perm + + +def CollateSparse(batch, **kwargs): ''' Collate sparse input. @@ -15,6 +132,9 @@ def CollateSparse(batch): ---------- batch : a list of dictionary Each list element (single dictionary) is a minibatch data = key-value pairs where a value is a parser function return. + boundaries: list, optional, default is None + This contains a list of volume boundaries if you want to process distinct volumes independently. See VolumeBoundaries + documentation for more details and explanations. Returns ------- @@ -29,6 +149,10 @@ def CollateSparse(batch): - The dictionaries in the input batch tuple are assumed to have identical list of keys. ''' import MinkowskiEngine as ME + + split_boundaries = 'boundaries' in kwargs + vb = VolumeBoundaries(kwargs['boundaries']) if split_boundaries else None + result = {} concat = np.concatenate for key in batch[0].keys(): @@ -54,7 +178,11 @@ def CollateSparse(batch): coords_minibatch.append(batched_coords) #coords = torch.Tensor(concat(coords_minibatch, axis=0)) + dim = coords[0].shape[1] coords = concat(coords_minibatch, axis=0) + if split_boundaries: + coords[:, :dim+1], perm = vb.split(coords[:, :dim+1]) + coords = coords[perm] result[key] = coords else: @@ -89,6 +217,12 @@ def CollateSparse(batch): axis=1 ) for batch_id, sample in enumerate(batch) ], axis = 0) data = concat([sample[key][1] for sample in batch], axis=0) + + if split_boundaries: + voxels, perm = vb.split(voxels) + voxels = voxels[perm] + data = data[perm] + result[key] = concat([voxels, data], axis=1) elif isinstance(batch[0][key],np.ndarray) and \ len(batch[0][key].shape) == 1: diff --git a/mlreco/iotools/factories.py b/mlreco/iotools/factories.py index 9963736f..217f0632 100644 --- a/mlreco/iotools/factories.py +++ b/mlreco/iotools/factories.py @@ -44,6 +44,13 @@ def loader_factory(cfg,event_list=None): shuffle = True if not 'shuffle' in params else bool(params['shuffle' ]) num_workers = 1 if not 'num_workers' in params else int (params['num_workers']) collate_fn = None if not 'collate_fn' in params else str (params['collate_fn' ]) + collate_kwargs = {} + + if collate_fn is None: + collate_params = params.get('collate', {}) + collate_fn = None if not 'collate_fn' in collate_params else str(collate_params['collate_fn']) + collate_params.pop('collate_fn', None) + collate_kwargs = collate_params if not int(params['batch_size']) % int(params['minibatch_size']) == 0: print('iotools.batch_size (',params['batch_size'],'must be divisble by iotools.minibatch_size',params['minibatch_size']) @@ -51,6 +58,7 @@ def loader_factory(cfg,event_list=None): import mlreco.iotools.collates import mlreco.iotools.samplers + from functools import partial ds = dataset_factory(cfg,event_list) sampler = None @@ -59,7 +67,7 @@ def loader_factory(cfg,event_list=None): sam_cfg['minibatch_size']=cfg['iotool']['minibatch_size'] sampler = getattr(mlreco.iotools.samplers,sam_cfg['name']).create(ds,sam_cfg) if collate_fn is not None: - collate_fn = getattr(mlreco.iotools.collates,collate_fn) + collate_fn = partial(getattr(mlreco.iotools.collates,collate_fn), **collate_kwargs) loader = DataLoader(ds, batch_size = minibatch_size, shuffle = shuffle, diff --git a/mlreco/iotools/parsers/sparse.py b/mlreco/iotools/parsers/sparse.py index 01a18f9d..fc728159 100644 --- a/mlreco/iotools/parsers/sparse.py +++ b/mlreco/iotools/parsers/sparse.py @@ -57,7 +57,7 @@ def parse_sparse2d(sparse_event_list): return np_voxels, np.concatenate(output, axis=-1) -def parse_sparse3d(sparse_event_list): +def parse_sparse3d(sparse_event_list, features=None): """ A function to retrieve sparse tensor input from larcv::EventSparseTensor3D object @@ -78,6 +78,15 @@ def parse_sparse3d(sparse_event_list): ------------- sparse_event_list: list of larcv::EventSparseTensor3D Can be repeated to load more features (one per feature). + features: int, optional + Default is None (ignored). If a positive integer is specified, + the sparse_event_list will be split in equal lists of length + `features`. Each list will be concatenated along the feature + dimension separately. Then all lists are concatenated along the + first dimension (voxels). For example, this lets you work with + distinct detector volumes whose input data is stored in separate + TTrees.`features` is required to be a divider of the `sparse_event_list` + length. Returns ------- @@ -86,21 +95,37 @@ def parse_sparse3d(sparse_event_list): data: numpy array(float32) with shape (N,C) Pixel values/channels, as many channels as specified larcv::EventSparseTensor3D. """ - meta = None - output = [] - np_voxels = None - for sparse_event in sparse_event_list: - num_point = sparse_event.as_vector().size() - if meta is None: - meta = sparse_event.meta() - np_voxels = np.empty(shape=(num_point, 3), dtype=np.int32) - larcv.fill_3d_voxels(sparse_event, np_voxels) - else: - assert meta == sparse_event.meta() - np_data = np.empty(shape=(num_point, 1), dtype=np.float32) - larcv.fill_3d_pcloud(sparse_event, np_data) - output.append(np_data) - return np_voxels, np.concatenate(output, axis=-1) + split_sparse_event_list = [sparse_event_list] + if features is not None and features > 0: + if len(sparse_event_list) % features > 0: + raise Exception("features number in parse_sparse3d should be a divider of the sparse_event_list length.") + split_sparse_event_list = np.split(np.array(sparse_event_list), len(sparse_event_list) / features) + + voxels, features = [], [] + features_count = None + for sparse_event_list in split_sparse_event_list: + if features_count is None: + features_count = len(sparse_event_list) + assert len(sparse_event_list) == features_count + + meta = None + output = [] + np_voxels = None + for sparse_event in sparse_event_list: + num_point = sparse_event.as_vector().size() + if meta is None: + meta = sparse_event.meta() + np_voxels = np.empty(shape=(num_point, 3), dtype=np.int32) + larcv.fill_3d_voxels(sparse_event, np_voxels) + else: + assert meta == sparse_event.meta() + np_data = np.empty(shape=(num_point, 1), dtype=np.float32) + larcv.fill_3d_pcloud(sparse_event, np_data) + output.append(np_data) + voxels.append(np_voxels) + features.append(np.concatenate(output, axis=-1)) + + return np.concatenate(voxels, axis=0), np.concatenate(features, axis=0) def parse_sparse3d_ghost(sparse_event_semantics): diff --git a/mlreco/models/full_chain.py b/mlreco/models/full_chain.py index 203373b4..06814fe4 100644 --- a/mlreco/models/full_chain.py +++ b/mlreco/models/full_chain.py @@ -257,9 +257,6 @@ def full_chain_cnn(self, input): input = [input[0][deghost]] if label_seg is not None and label_clustering is not None: - - #print(label_seg[0].shape, label_clustering[0].shape) - # ME uses 0 for batch column, so need to compensate label_clustering = adapt_labels(result, label_seg, diff --git a/mlreco/models/layers/gnn/losses/node_kinematics.py b/mlreco/models/layers/gnn/losses/node_kinematics.py index a8f815f2..96170cb9 100644 --- a/mlreco/models/layers/gnn/losses/node_kinematics.py +++ b/mlreco/models/layers/gnn/losses/node_kinematics.py @@ -175,6 +175,7 @@ def forward(self, out, types): valid_mask_type = node_assn_type > -1 # Do not apply loss if the logit corresponding to the true class is -inf (forbidden prediction) + # Not a problem is node_assn_type is -1, as these rows will already be excluded by previous mask valid_mask_type &= (node_pred_type[np.arange(len(node_assn_type)),node_assn_type] != -float('inf')).detach().cpu().numpy() # If high purity is requested, do not include broken particle in the loss diff --git a/mlreco/utils/deghosting.py b/mlreco/utils/deghosting.py index 5540151b..8333d239 100644 --- a/mlreco/utils/deghosting.py +++ b/mlreco/utils/deghosting.py @@ -124,9 +124,6 @@ def adapt_labels_knn(result, label_seg, label_clustering, assert true_mask.shape[0] == label_seg[0].shape[0] c3 = max(c2, batch_column+1) - indices = "2762 2763 2767 2769 4821 4822 4831 4832 4833 4834 4835 4844 4857 6617 12095 12096 12097".split() - indices = np.array([int(i) for i in indices]) - for i in range(len(label_seg)): coords = label_seg[i][:, :c3] label_c = [] From 8eb58694f9505c4dd3ec70dc112aa1f75202c0a1 Mon Sep 17 00:00:00 2001 From: Temigo Date: Fri, 28 Oct 2022 15:17:07 -0700 Subject: [PATCH 30/52] Changes to analysis tools (and small fixes) for volume splitting --- analysis/classes/Interaction.py | 99 ++ analysis/classes/Particle.py | 98 ++ analysis/classes/ParticleFragment.py | 63 + analysis/classes/TruthInteraction.py | 31 + analysis/classes/TruthParticle.py | 70 + analysis/classes/TruthParticleFragment.py | 24 + analysis/classes/__init__.py | 6 + analysis/classes/particle.py | 343 +---- analysis/classes/ui.py | 1153 ++++++++++------- mlreco/iotools/collates.py | 99 +- mlreco/models/layers/common/gnn_full_chain.py | 2 + mlreco/utils/cluster/fragmenter.py | 1 + mlreco/utils/gnn/data.py | 1 + 13 files changed, 1182 insertions(+), 808 deletions(-) create mode 100644 analysis/classes/Interaction.py create mode 100644 analysis/classes/Particle.py create mode 100644 analysis/classes/ParticleFragment.py create mode 100644 analysis/classes/TruthInteraction.py create mode 100644 analysis/classes/TruthParticle.py create mode 100644 analysis/classes/TruthParticleFragment.py diff --git a/analysis/classes/Interaction.py b/analysis/classes/Interaction.py new file mode 100644 index 00000000..a22a8f77 --- /dev/null +++ b/analysis/classes/Interaction.py @@ -0,0 +1,99 @@ +import numpy as np +import pandas as pd + +from typing import Counter, List, Union +from collections import defaultdict, Counter +from . import Particle + + +class Interaction: + """ + Data structure for managing interaction-level + full chain output information. + + Attributes + ---------- + id: int + Unique ID (Interaction ID) of this interaction. + particles: List[Particle] + List of objects that belong to this Interaction. + vertex: (1,3) np.array (Optional) + 3D coordinates of the predicted interaction vertex + nu_id: int (Optional, TODO) + Label indicating whether this interaction is a neutrino interaction + WARNING: The nu_id label is most likely unreliable. Don't use this + in reconstruction (used for debugging) + num_particles: int + total number of particles in this interaction. + """ + def __init__(self, interaction_id, particles, vertex=None, nu_id=-1, volume=0): + self.id = interaction_id + self.particles = particles + self.match = [] + self._match_counts = {} + self.check_validity() + # Voxel indices of an interaction is defined by the union of + # constituent particle voxel indices + self.voxel_indices = [] + for p in self.particles: + self.voxel_indices.append(p.voxel_indices) + assert p.interaction_id == interaction_id + self.voxel_indices = np.hstack(self.voxel_indices) + self.size = self.voxel_indices.shape[0] + self.num_particles = len(self.particles) + + self.pid_keys = { + 0: 'Photon', + 1: 'Electron', + 2: 'Muon', + 3: 'Pion', + 4: 'Proton' + } + + self.get_particles_summary() + + self.vertex = vertex + if self.vertex is None: + self.vertex = np.array([-1, -1, -1]) + + self.nu_id = nu_id + self.volume = volume + + self.particle_ids = [p.id for p in self.particles] + self.particle_counts = Counter({ self.pid_keys[i] : 0 for i in range(len(self.pid_keys))}) + self.particle_counts.update([self.pid_keys[p.pid] for p in self.particles]) + + self.primary_particle_counts = Counter({ self.pid_keys[i] : 0 for i in range(len(self.pid_keys))}) + self.primary_particle_counts.update([self.pid_keys[p.pid] for p in self.particles if p.is_primary]) + + if sum(self.primary_particle_counts.values()) == 0: + # print("Interaction {} has no primary particles!".format(self.id)) + self.is_valid = False + else: + self.is_valid = True + + def check_validity(self): + for p in self.particles: + assert isinstance(p, Particle) + + def get_particles_summary(self): + self.particles_summary = "" + self.particles = sorted(self.particles, key=lambda x: x.id) + for p in self.particles: + pmsg = " - Particle {}: PID = {}, Size = {}, Match = {} \n".format( + p.id, self.pid_keys[p.pid], p.points.shape[0], str(p.match)) + self.particles_summary += pmsg + + + def __repr__(self): + + self.get_particles_summary() + msg = "Interaction {}, Valid: {}, Vertex: x={:.2f}, y={:.2f}, z={:.2f}\n"\ + "--------------------------------------------------------------------\n".format( + self.id, self.is_valid, self.vertex[0], self.vertex[1], self.vertex[2]) + return msg + self.particles_summary + + def __str__(self): + return "Interaction(id={}, vertex={}, nu_id={}, Particles={})".format( + self.id, str(self.vertex), self.nu_id, str(self.particle_ids)) + diff --git a/analysis/classes/Particle.py b/analysis/classes/Particle.py new file mode 100644 index 00000000..252e0107 --- /dev/null +++ b/analysis/classes/Particle.py @@ -0,0 +1,98 @@ +import numpy as np +import pandas as pd + +from typing import Counter, List, Union + + +class Particle: + ''' + Data Structure for managing Particle-level + full chain output information + + Attributes + ---------- + id: int + Unique ID of the particle + points: (N, 3) np.array + 3D coordinates of the voxels that belong to this particle + size: int + Total number of voxels that belong to this particle + depositions: (N, 1) np.array + Array of energy deposition values for each voxel (rescaled, ADC units) + voxel_indices: (N, ) np.array + Numeric integer indices of voxel positions of this particle + with respect to the total array of point in a single image. + semantic_type: int + Semantic type (shower fragment (0), track (1), + michel (2), delta (3), lowE (4)) of this particle. + pid: int + PDG Type (Photon (0), Electron (1), Muon (2), + Charged Pion (3), Proton (4)) of this particle. + pid_conf: float + Softmax probability score for the most likely pid prediction + interaction_id: int + Integer ID of the particle's parent interaction + image_id: int + ID of the image in which this particle resides in + is_primary: bool + Indicator whether this particle is a primary from an interaction. + match: List[int] + List of TruthParticle IDs for which this particle is matched to + + startpoint: (1,3) np.array + (1, 3) array of particle's startpoint, if it could be assigned + endpoint: (1,3) np.array + (1, 3) array of particle's endpoint, if it could be assigned + ''' + def __init__(self, coords, group_id, semantic_type, interaction_id, + pid, image_id=0, voxel_indices=None, depositions=None, volume=0, **kwargs): + self.id = group_id + self.points = coords + self.size = coords.shape[0] + self.depositions = depositions # In rescaled ADC + self.voxel_indices = voxel_indices + self.semantic_type = semantic_type + self.pid = pid + self.pid_conf = kwargs.get('pid_conf', None) + self.interaction_id = interaction_id + self.image_id = image_id + self.is_primary = kwargs.get('is_primary', False) + self.match = [] + self._match_counts = {} +# self.fragments = fragment_ids + self.semantic_keys = { + 0: 'Shower Fragment', + 1: 'Track', + 2: 'Michel Electron', + 3: 'Delta Ray', + 4: 'LowE Depo' + } + + self.pid_keys = { + -1: 'None', + 0: 'Photon', + 1: 'Electron', + 2: 'Muon', + 3: 'Pion', + 4: 'Proton' + } + + self.sum_edep = np.sum(self.depositions) + self.volume = volume + + def __str__(self): + return self.__repr__() + + def __repr__(self): + fmt = "Particle( Image ID={:<3} | Particle ID={:<3} | Semantic_type: {:<15}"\ + " | PID: {:<8} | Primary: {:<2} | Score = {:.2f}% | Interaction ID: {:<2} | Size: {:<5} | Volume: {:<2} )" + msg = fmt.format(self.image_id, self.id, + self.semantic_keys[self.semantic_type] if self.semantic_type in self.semantic_keys else "None", + self.pid_keys[self.pid] if self.pid in self.pid_keys else "None", + self.is_primary, + self.pid_conf * 100, + self.interaction_id, + self.points.shape[0], + self.volume) + return msg + diff --git a/analysis/classes/ParticleFragment.py b/analysis/classes/ParticleFragment.py new file mode 100644 index 00000000..07cf40d7 --- /dev/null +++ b/analysis/classes/ParticleFragment.py @@ -0,0 +1,63 @@ +import numpy as np +import pandas as pd + +from typing import Counter, List, Union +from . import Particle + + +class ParticleFragment(Particle): + ''' + Data structure for managing fragment-level + full chain output information + + Attributes + ---------- + See documentation for shared attributes. + Below are attributes exclusive to ParticleFragment + + id: int + fragment ID of this particle fragment (different from particle id) + group_id: int + Group ID (alias for Particle ID) for which this fragment belongs to. + is_primary: bool + If True, then this particle fragment corresponds to + a primary ionization trajectory within the group of fragments that + compose a particle. + ''' + def __init__(self, coords, fragment_id, semantic_type, interaction_id, + group_id, image_id=0, voxel_indices=None, + depositions=None, volume=0, **kwargs): + self.id = fragment_id + self.points = coords + self.size = coords.shape[0] + self.depositions = depositions # In rescaled ADC + self.voxel_indices = voxel_indices + self.semantic_type = semantic_type + self.group_id = group_id + self.interaction_id = interaction_id + self.image_id = image_id + self.is_primary = kwargs.get('is_primary', False) + self.semantic_keys = { + 0: 'Shower Fragment', + 1: 'Track', + 2: 'Michel Electron', + 3: 'Delta Ray', + 4: 'LowE Depo' + } + self.volume = volume + + def __str__(self): + return self.__repr__() + + def __repr__(self): + fmt = "ParticleFragment( Image ID={:<3} | Fragment ID={:<3} | Semantic_type: {:<15}"\ + " | Group ID: {:<3} | Primary: {:<2} | Interaction ID: {:<2} | Size: {:<5} | Volume: {:<2})" + msg = fmt.format(self.image_id, self.id, + self.semantic_keys[self.semantic_type] if self.semantic_type in self.semantic_keys else "None", + self.group_id, + self.is_primary, + self.interaction_id, + self.points.shape[0], + self.volume) + return msg + diff --git a/analysis/classes/TruthInteraction.py b/analysis/classes/TruthInteraction.py new file mode 100644 index 00000000..d76f863a --- /dev/null +++ b/analysis/classes/TruthInteraction.py @@ -0,0 +1,31 @@ +import numpy as np +import pandas as pd + +from . import Interaction, TruthParticle + + +class TruthInteraction(Interaction): + """ + Analogous data structure for Interactions retrieved from true labels. + """ + def __init__(self, *args, **kwargs): + super(TruthInteraction, self).__init__(*args, **kwargs) + self.match = [] + self._match_counts = {} + + def check_validity(self): + for p in self.particles: + assert isinstance(p, TruthParticle) + + def __repr__(self): + + self.get_particles_summary() + msg = "TruthInteraction {}, Vertex: x={:.2f}, y={:.2f}, z={:.2f}\n"\ + "-----------------------------------------------\n".format( + self.id, self.vertex[0], self.vertex[1], self.vertex[2]) + return msg + self.particles_summary + + def __str__(self): + return "TruthInteraction(id={}, vertex={}, nu_id={}, Particles={})".format( + self.id, str(self.vertex), self.nu_id, str(self.particle_ids)) + diff --git a/analysis/classes/TruthParticle.py b/analysis/classes/TruthParticle.py new file mode 100644 index 00000000..7757b13b --- /dev/null +++ b/analysis/classes/TruthParticle.py @@ -0,0 +1,70 @@ +import numpy as np +import pandas as pd + +from typing import Counter, List, Union +from . import Particle + + +class TruthParticle(Particle): + ''' + Data structure mirroring , reserved for true particles + derived from true labels / true MC information. + + Attributes + ---------- + See documentation for shared attributes. + Below are attributes exclusive to TruthParticle + + asis: larcv.Particle C++ object (Optional) + Raw larcv.Particle C++ object as retrived from parse_particles_asis. + match: List[int] + List of Particle IDs that match to this TruthParticle + coords_noghost: + Coordinates using true labels (not adapted to deghosting output) + depositions_noghost: + Depositions using true labels (not adapted to deghosting output), in MeV. + depositions_MeV: + Similar as `depositions`, i.e. using adapted true labels. + Using true MeV energy deposits instead of rescaled ADC units. + ''' + def __init__(self, *args, particle_asis=None, coords_noghost=None, depositions_noghost=None, + depositions_MeV=None, **kwargs): + super(TruthParticle, self).__init__(*args, **kwargs) + self.asis = particle_asis + self.match = [] + self._match_counts = {} + self.coords_noghost = coords_noghost + self.depositions_noghost = depositions_noghost + self.depositions_MeV = depositions_MeV + + def __repr__(self): + fmt = "TruthParticle( Image ID={:<3} | Particle ID={:<3} | Semantic_type: {:<15}"\ + " | PID: {:<8} | Primary: {:<2} | Interaction ID: {:<2} | Size: {:<5} | Volume: {:<2} )" + msg = fmt.format(self.image_id, self.id, + self.semantic_keys[self.semantic_type] if self.semantic_type in self.semantic_keys else "None", + self.pid_keys[self.pid] if self.pid in self.pid_keys else "None", + self.is_primary, + self.interaction_id, + self.points.shape[0], + self.volume) + return msg + + + def is_contained(self, spatial_size): + + p = self.particle_asis + check_contained = p.position().x() >= 0 and p.position().x() <= spatial_size \ + and p.position().y() >= 0 and p.position().y() <= spatial_size \ + and p.position().z() >= 0 and p.position().z() <= spatial_size \ + and p.end_position().x() >= 0 and p.end_position().x() <= spatial_size \ + and p.end_position().y() >= 0 and p.end_position().y() <= spatial_size \ + and p.end_position().z() >= 0 and p.end_position().z() <= spatial_size + return check_contained + + def purity_efficiency(self, other_particle): + overlap = len(np.intersect1d(self.voxel_indices, other_particle.voxel_indices)) + return { + "purity": overlap / len(other_particle.voxel_indices), + "efficiency": overlap / len(self.voxel_indices) + } + diff --git a/analysis/classes/TruthParticleFragment.py b/analysis/classes/TruthParticleFragment.py new file mode 100644 index 00000000..9df9366b --- /dev/null +++ b/analysis/classes/TruthParticleFragment.py @@ -0,0 +1,24 @@ +import numpy as np +import pandas as pd + +from typing import Counter, List, Union +from . import ParticleFragment + + +class TruthParticleFragment(ParticleFragment): + + def __init__(self, *args, depositions_MeV=None, **kwargs): + super(TruthParticleFragment, self).__init__(*args, **kwargs) + self.depositions_MeV = depositions_MeV + + def __repr__(self): + fmt = "TruthParticleFragment( Image ID={:<3} | Fragment ID={:<3} | Semantic_type: {:<15}"\ + " | Group ID: {:<3} | Primary: {:<2} | Interaction ID: {:<2} | Size: {:<5} | Volume: {:<2})" + msg = fmt.format(self.image_id, self.id, + self.semantic_keys[self.semantic_type] if self.semantic_type in self.semantic_keys else "None", + self.group_id, + self.is_primary, + self.interaction_id, + self.points.shape[0], + self.volume) + return msg diff --git a/analysis/classes/__init__.py b/analysis/classes/__init__.py index e69de29b..6902e665 100644 --- a/analysis/classes/__init__.py +++ b/analysis/classes/__init__.py @@ -0,0 +1,6 @@ +from .Particle import Particle +from .ParticleFragment import ParticleFragment +from .TruthParticle import TruthParticle +from .TruthParticleFragment import TruthParticleFragment +from .Interaction import Interaction +from .TruthInteraction import TruthInteraction diff --git a/analysis/classes/particle.py b/analysis/classes/particle.py index 1645307f..8d4dc2f0 100644 --- a/analysis/classes/particle.py +++ b/analysis/classes/particle.py @@ -8,348 +8,7 @@ from pprint import pprint - -class Particle: - ''' - Data Structure for managing Particle-level - full chain output information - - Attributes - ---------- - id: int - Unique ID of the particle - points: (N, 3) np.array - 3D coordinates of the voxels that belong to this particle - size: int - Total number of voxels that belong to this particle - depositions: (N, 1) np.array - Array of energy deposition values for each voxel (rescaled, ADC units) - voxel_indices: (N, ) np.array - Numeric integer indices of voxel positions of this particle - with respect to the total array of point in a single image. - semantic_type: int - Semantic type (shower fragment (0), track (1), - michel (2), delta (3), lowE (4)) of this particle. - pid: int - PDG Type (Photon (0), Electron (1), Muon (2), - Charged Pion (3), Proton (4)) of this particle. - pid_conf: float - Softmax probability score for the most likely pid prediction - interaction_id: int - Integer ID of the particle's parent interaction - image_id: int - ID of the image in which this particle resides in - is_primary: bool - Indicator whether this particle is a primary from an interaction. - match: List[int] - List of TruthParticle IDs for which this particle is matched to - - startpoint: (1,3) np.array - (1, 3) array of particle's startpoint, if it could be assigned - endpoint: (1,3) np.array - (1, 3) array of particle's endpoint, if it could be assigned - ''' - def __init__(self, coords, group_id, semantic_type, interaction_id, - pid, image_id=0, voxel_indices=None, depositions=None, **kwargs): - self.id = group_id - self.points = coords - self.size = coords.shape[0] - self.depositions = depositions # In rescaled ADC - self.voxel_indices = voxel_indices - self.semantic_type = semantic_type - self.pid = pid - self.pid_conf = kwargs.get('pid_conf', None) - self.interaction_id = interaction_id - self.image_id = image_id - self.is_primary = kwargs.get('is_primary', False) - self.match = [] - self._match_counts = {} -# self.fragments = fragment_ids - self.semantic_keys = { - 0: 'Shower Fragment', - 1: 'Track', - 2: 'Michel Electron', - 3: 'Delta Ray', - 4: 'LowE Depo' - } - - self.pid_keys = { - -1: 'None', - 0: 'Photon', - 1: 'Electron', - 2: 'Muon', - 3: 'Pion', - 4: 'Proton' - } - - self.sum_edep = np.sum(self.depositions) - - def __str__(self): - return self.__repr__() - - def __repr__(self): - fmt = "Particle( Image ID={:<3} | Particle ID={:<3} | Semantic_type: {:<15}"\ - " | PID: {:<8} | Primary: {:<2} | Score = {:.2f}% | Interaction ID: {:<2} | Size: {:<5} )" - msg = fmt.format(self.image_id, self.id, - self.semantic_keys[self.semantic_type] if self.semantic_type in self.semantic_keys else "None", - self.pid_keys[self.pid] if self.pid in self.pid_keys else "None", - self.is_primary, - self.pid_conf * 100, - self.interaction_id, - self.points.shape[0]) - return msg - - -class ParticleFragment(Particle): - ''' - Data structure for managing fragment-level - full chain output information - - Attributes - ---------- - See documentation for shared attributes. - Below are attributes exclusive to ParticleFragment - - id: int - fragment ID of this particle fragment (different from particle id) - group_id: int - Group ID (alias for Particle ID) for which this fragment belongs to. - is_primary: bool - If True, then this particle fragment corresponds to - a primary ionization trajectory within the group of fragments that - compose a particle. - ''' - def __init__(self, coords, fragment_id, semantic_type, interaction_id, - group_id, image_id=0, voxel_indices=None, - depositions=None, **kwargs): - self.id = fragment_id - self.points = coords - self.size = coords.shape[0] - self.depositions = depositions # In rescaled ADC - self.voxel_indices = voxel_indices - self.semantic_type = semantic_type - self.group_id = group_id - self.interaction_id = interaction_id - self.image_id = image_id - self.is_primary = kwargs.get('is_primary', False) - self.semantic_keys = { - 0: 'Shower Fragment', - 1: 'Track', - 2: 'Michel Electron', - 3: 'Delta Ray', - 4: 'LowE Depo' - } - - def __str__(self): - return self.__repr__() - - def __repr__(self): - fmt = "ParticleFragment( Image ID={:<3} | Fragment ID={:<3} | Semantic_type: {:<15}"\ - " | Group ID: {:<3} | Primary: {:<2} | Interaction ID: {:<2} | Size: {:<5} )" - msg = fmt.format(self.image_id, self.id, - self.semantic_keys[self.semantic_type] if self.semantic_type in self.semantic_keys else "None", - self.group_id, - self.is_primary, - self.interaction_id, - self.points.shape[0]) - return msg - - -class TruthParticleFragment(ParticleFragment): - - def __init__(self, *args, depositions_MeV=None, **kwargs): - super(TruthParticleFragment, self).__init__(*args, **kwargs) - self.depositions_MeV = depositions_MeV - - def __repr__(self): - fmt = "TruthParticleFragment( Image ID={:<3} | Fragment ID={:<3} | Semantic_type: {:<15}"\ - " | Group ID: {:<3} | Primary: {:<2} | Interaction ID: {:<2} | Size: {:<5} )" - msg = fmt.format(self.image_id, self.id, - self.semantic_keys[self.semantic_type] if self.semantic_type in self.semantic_keys else "None", - self.group_id, - self.is_primary, - self.interaction_id, - self.points.shape[0]) - return msg - - -class TruthParticle(Particle): - ''' - Data structure mirroring , reserved for true particles - derived from true labels / true MC information. - - Attributes - ---------- - See documentation for shared attributes. - Below are attributes exclusive to TruthParticle - - asis: larcv.Particle C++ object (Optional) - Raw larcv.Particle C++ object as retrived from parse_particles_asis. - match: List[int] - List of Particle IDs that match to this TruthParticle - coords_noghost: - Coordinates using true labels (not adapted to deghosting output) - depositions_noghost: - Depositions using true labels (not adapted to deghosting output), in MeV. - depositions_MeV: - Similar as `depositions`, i.e. using adapted true labels. - Using true MeV energy deposits instead of rescaled ADC units. - ''' - def __init__(self, *args, particle_asis=None, coords_noghost=None, depositions_noghost=None, - depositions_MeV=None, **kwargs): - super(TruthParticle, self).__init__(*args, **kwargs) - self.asis = particle_asis - self.match = [] - self._match_counts = {} - self.coords_noghost = coords_noghost - self.depositions_noghost = depositions_noghost - self.depositions_MeV = depositions_MeV - - def __repr__(self): - fmt = "TruthParticle( Image ID={:<3} | Particle ID={:<3} | Semantic_type: {:<15}"\ - " | PID: {:<8} | Primary: {:<2} | Interaction ID: {:<2} | Size: {:<5} )" - msg = fmt.format(self.image_id, self.id, - self.semantic_keys[self.semantic_type] if self.semantic_type in self.semantic_keys else "None", - self.pid_keys[self.pid] if self.pid in self.pid_keys else "None", - self.is_primary, - self.interaction_id, - self.points.shape[0]) - return msg - - - def is_contained(self, spatial_size): - - p = self.particle_asis - check_contained = p.position().x() >= 0 and p.position().x() <= spatial_size \ - and p.position().y() >= 0 and p.position().y() <= spatial_size \ - and p.position().z() >= 0 and p.position().z() <= spatial_size \ - and p.end_position().x() >= 0 and p.end_position().x() <= spatial_size \ - and p.end_position().y() >= 0 and p.end_position().y() <= spatial_size \ - and p.end_position().z() >= 0 and p.end_position().z() <= spatial_size - return check_contained - - def purity_efficiency(self, other_particle): - overlap = len(np.intersect1d(self.voxel_indices, other_particle.voxel_indices)) - return { - "purity": overlap / len(other_particle.voxel_indices), - "efficiency": overlap / len(self.voxel_indices) - } - -class Interaction: - """ - Data structure for managing interaction-level - full chain output information. - - Attributes - ---------- - id: int - Unique ID (Interaction ID) of this interaction. - particles: List[Particle] - List of objects that belong to this Interaction. - vertex: (1,3) np.array (Optional) - 3D coordinates of the predicted interaction vertex - nu_id: int (Optional, TODO) - Label indicating whether this interaction is a neutrino interaction - WARNING: The nu_id label is most likely unreliable. Don't use this - in reconstruction (used for debugging) - num_particles: int - total number of particles in this interaction. - """ - def __init__(self, interaction_id, particles, vertex=None, nu_id=-1): - self.id = interaction_id - self.particles = particles - self.match = [] - self._match_counts = {} - self.check_validity() - # Voxel indices of an interaction is defined by the union of - # constituent particle voxel indices - self.voxel_indices = [] - for p in self.particles: - self.voxel_indices.append(p.voxel_indices) - assert p.interaction_id == interaction_id - self.voxel_indices = np.hstack(self.voxel_indices) - self.size = self.voxel_indices.shape[0] - self.num_particles = len(self.particles) - - self.pid_keys = { - 0: 'Photon', - 1: 'Electron', - 2: 'Muon', - 3: 'Pion', - 4: 'Proton' - } - - self.get_particles_summary() - - self.vertex = vertex - if self.vertex is None: - self.vertex = np.array([-1, -1, -1]) - - self.nu_id = nu_id - - self.particle_ids = [p.id for p in self.particles] - self.particle_counts = Counter({ self.pid_keys[i] : 0 for i in range(len(self.pid_keys))}) - self.particle_counts.update([self.pid_keys[p.pid] for p in self.particles]) - - self.primary_particle_counts = Counter({ self.pid_keys[i] : 0 for i in range(len(self.pid_keys))}) - self.primary_particle_counts.update([self.pid_keys[p.pid] for p in self.particles if p.is_primary]) - - if sum(self.primary_particle_counts.values()) == 0: - # print("Interaction {} has no primary particles!".format(self.id)) - self.is_valid = False - else: - self.is_valid = True - - def check_validity(self): - for p in self.particles: - assert isinstance(p, Particle) - - def get_particles_summary(self): - self.particles_summary = "" - self.particles = sorted(self.particles, key=lambda x: x.id) - for p in self.particles: - pmsg = " - Particle {}: PID = {}, Size = {}, Match = {} \n".format( - p.id, self.pid_keys[p.pid], p.points.shape[0], str(p.match)) - self.particles_summary += pmsg - - - def __repr__(self): - - self.get_particles_summary() - msg = "Interaction {}, Valid: {}, Vertex: x={:.2f}, y={:.2f}, z={:.2f}\n"\ - "--------------------------------------------------------------------\n".format( - self.id, self.is_valid, self.vertex[0], self.vertex[1], self.vertex[2]) - return msg + self.particles_summary - - def __str__(self): - return "Interaction(id={}, vertex={}, nu_id={}, Particles={})".format( - self.id, str(self.vertex), self.nu_id, str(self.particle_ids)) - - -class TruthInteraction(Interaction): - """ - Analogous data structure for Interactions retrieved from true labels. - """ - def __init__(self, *args, **kwargs): - super(TruthInteraction, self).__init__(*args, **kwargs) - self.match = [] - self._match_counts = {} - - def check_validity(self): - for p in self.particles: - assert isinstance(p, TruthParticle) - - def __repr__(self): - - self.get_particles_summary() - msg = "TruthInteraction {}, Vertex: x={:.2f}, y={:.2f}, z={:.2f}\n"\ - "-----------------------------------------------\n".format( - self.id, self.vertex[0], self.vertex[1], self.vertex[2]) - return msg + self.particles_summary - - def __str__(self): - return "TruthInteraction(id={}, vertex={}, nu_id={}, Particles={})".format( - self.id, str(self.vertex), self.nu_id, str(self.particle_ids)) +from . import Particle, TruthParticle, Interaction, TruthInteraction def matrix_counts(particles_x, particles_y): diff --git a/analysis/classes/ui.py b/analysis/classes/ui.py index 0536ff9a..1f0d0158 100644 --- a/analysis/classes/ui.py +++ b/analysis/classes/ui.py @@ -8,7 +8,10 @@ from collections import defaultdict from scipy.special import softmax -from analysis.classes.particle import * +from analysis.classes import Particle, ParticleFragment, TruthParticleFragment, \ + TruthParticle, Interaction, TruthInteraction +from analysis.classes.particle import matrix_counts, matrix_iou, \ + match_particles_fn, match_interactions_fn, group_particles_to_interactions_fn from analysis.algorithms.point_matching import * from mlreco.utils.groups import type_labels as TYPE_LABELS @@ -16,6 +19,7 @@ from mlreco.utils.deghosting import deghost_labels_and_predictions, compute_rescaled_charge from mlreco.utils.gnn.cluster import get_cluster_label +from mlreco.iotools.collates import VolumeBoundaries class FullChainPredictor: @@ -52,6 +56,7 @@ class FullChainPredictor: ''' def __init__(self, data_blob, result, cfg, predictor_cfg={}, deghosting=False): self.module_config = cfg['model']['modules'] + self.cfg = cfg # Handle deghosting before anything and save deghosting specific # quantities separately from data_blob and result @@ -99,8 +104,18 @@ def __init__(self, data_blob, result, cfg, predictor_cfg={}, deghosting=False): self.volume_boundaries[1, :] = (self.volume_boundaries[1, :] - min_y) / size_voxel_y self.volume_boundaries[2, :] = (self.volume_boundaries[2, :] - min_z) / size_voxel_z + # Determine whether we need to account for several distinct volumes + # split over "virtual" batch ids + boundaries = cfg['iotool'].get('collate', {}).get('boundaries', None) + if boundaries is not None: + self.vb = VolumeBoundaries(boundaries) + self._num_volumes = self.vb.num_volumes() + else: + self.vb = None + self._num_volumes = 1 + def __repr__(self): - msg = "FullChainEvaluator(num_images={})".format(self.num_images) + msg = "FullChainEvaluator(num_images={})".format(int(self.num_images/self._num_volumes)) return msg @@ -381,11 +396,59 @@ def _fit_predict_vertex_info(self, entry, inter_idx): return vertex_info + def _get_entries(self, entry, volume): + """ + Make a list of actual entries in the batch ids. This accounts for potential + virtual batch ids in case we used volume boundaries to process several volumes + separately. + + Parameters + ========== + entry: int + Which entry of the original dataset you want to access. + volume: int or None + Which volume you want to access. None means all of them. + + Returns + ======= + list + List of integers = actual batch ids in the tensors (potentially virtual batch ids). + """ + entries = [entry] # default behavior + if self.vb is not None: # in case we defined virtual batch ids (volume boundaries) + entries = self.vb.virtual_batch_ids(entry) # these are ALL the virtual batch ids corresponding to this entry + if volume is not None: # maybe we wanted to select a specific volume + entries = [entries[volume]] + return entries + + def _check_volume(self, volume): + """ + Basic sanity check that the volume given makes sense given the config. + + Parameters + ========== + volume: int or None + + Returns + ======= + Nothing + """ + if volume is not None and self.vb is None: + raise Exception("You need to specify volume boundaries in your I/O config (collate section).") + if volume is not None: + assert isinstance(volume, (int, np.int64, np.int32)) and volume >= 0 + + def _translate(self, voxels, volume): + if self.vb is None: + return voxels + else: + return self.vb.translate(voxels, volume) def get_fragments(self, entry, only_primaries=False, min_particle_voxel_count=-1, attaching_threshold=2, - semantic_type=None, verbose=False, true_id=False) -> List[Particle]: + semantic_type=None, verbose=False, + true_id=False, volume=None) -> List[Particle]: ''' Method for retriving fragment list for given batch index. @@ -394,119 +457,141 @@ def get_fragments(self, entry, only_primaries=False, Method also performs startpoint prediction for shower fragments. - Inputs: - - entry: Batch number to retrieve example. - - semantic_type (optional): if True, only ppn candiates with the + Parameters + ========== + entry: int + Batch number to retrieve example. + only_primaries: bool, default False + min_particle_voxel_count: int, default -1 + attaching_threshold: float, default 2 + threshold distance to attach ppn point to particle. + semantic_type: int, default None + If True, only ppn candiates with the same predicted semantic type will be matched to its corresponding particle. - - threshold (float, optional): threshold distance to attach - ppn point to particle. + verbose: bool, default False + true_id: bool, default False + volume: int, default None - Returns: - - out: List of instances (see Particle class definition). + Returns + ======= + list + List of instances (see Particle class definition). ''' + self._check_volume(volume) + if min_particle_voxel_count < 0: min_particle_voxel_count = self.min_particle_voxel_count - point_cloud = self.data_blob['input_data'][entry][:, 1:4] - depositions = self.result['input_rescaled'][entry][:, 4] - fragments = self.result['fragments'][entry] - fragments_seg = self.result['fragments_seg'][entry] + entries = self._get_entries(entry, volume) - shower_mask = fragments_seg == 0 - shower_frag_primary = np.argmax(self.result['shower_node_pred'][entry], axis=1) + out_fragment_list = [] + for entry in entries: + volume = entry % self._num_volumes - if 'shower_node_features' in self.result: - shower_node_features = self.result['shower_node_features'][entry] - if 'track_node_features' in self.result: - track_node_features = self.result['track_node_features'][entry] + point_cloud = self.data_blob['input_data'][entry][:, 1:4] + depositions = self.result['input_rescaled'][entry][:, 4] + fragments = self.result['fragments'][entry] + fragments_seg = self.result['fragments_seg'][entry] - assert len(fragments_seg) == len(fragments) + shower_mask = np.isin(fragments_seg, self.module_config['grappa_shower']['base']['node_type']) + shower_frag_primary = np.argmax(self.result['shower_node_pred'][entry], axis=1) - temp = [] + if 'shower_node_features' in self.result: + shower_node_features = self.result['shower_node_features'][entry] + if 'track_node_features' in self.result: + track_node_features = self.result['track_node_features'][entry] - if ('inter_group_pred' in self.result) and ('particles' in self.result) and len(fragments) > 0: + assert len(fragments_seg) == len(fragments) - group_labels = self._fit_predict_groups(entry) - inter_labels = self._fit_predict_interaction_labels(entry) - group_ids = get_cluster_label(group_labels.reshape(-1, 1), fragments, column=0) - inter_ids = get_cluster_label(inter_labels.reshape(-1, 1), fragments, column=0) + temp = [] + + if ('inter_group_pred' in self.result) and ('particles' in self.result) and len(fragments) > 0: + + group_labels = self._fit_predict_groups(entry) + inter_labels = self._fit_predict_interaction_labels(entry) + group_ids = get_cluster_label(group_labels.reshape(-1, 1), fragments, column=0) + inter_ids = get_cluster_label(inter_labels.reshape(-1, 1), fragments, column=0) + + else: + group_ids = np.ones(len(fragments)).astype(int) * -1 + inter_ids = np.ones(len(fragments)).astype(int) * -1 - else: - group_ids = np.ones(len(fragments)).astype(int) * -1 - inter_ids = np.ones(len(fragments)).astype(int) * -1 - - if true_id: - true_fragment_labels = self.data_blob['cluster_label'][entry][:, 5] - - - for i, p in enumerate(fragments): - voxels = point_cloud[p] - seg_label = fragments_seg[i] - part = ParticleFragment(voxels, i, seg_label, - interaction_id=inter_ids[i], - group_id=group_ids[i], - image_id=entry, - voxel_indices=p, - depositions=depositions[p], - is_primary=False, - pid_conf=-1, - alias='Fragment') - temp.append(part) if true_id: - fid = true_fragment_labels[p] - fids, counts = np.unique(fid.astype(int), return_counts=True) - part.true_ids = fids - part.true_counts = counts - - # Label shower fragments as primaries and attach startpoint - shower_counter = 0 - for p in temp: - if p.semantic_type == 0: + true_fragment_labels = self.data_blob['cluster_label'][entry][:, 5] + + + for i, p in enumerate(fragments): + voxels = point_cloud[p] + seg_label = fragments_seg[i] + part = ParticleFragment(self._translate(voxels, volume), + i, seg_label, + interaction_id=inter_ids[i], + group_id=group_ids[i], + image_id=entry, + voxel_indices=p, + depositions=depositions[p], + is_primary=False, + pid_conf=-1, + alias='Fragment', + volume=volume) + temp.append(part) + if true_id: + fid = true_fragment_labels[p] + fids, counts = np.unique(fid.astype(int), return_counts=True) + part.true_ids = fids + part.true_counts = counts + + # Label shower fragments as primaries and attach startpoint + shower_counter = 0 + for p in np.array(temp)[shower_mask]: is_primary = shower_frag_primary[shower_counter] p.is_primary = bool(is_primary) p.startpoint = shower_node_features[shower_counter][19:22] # p.group_id = int(shower_group_pred[shower_counter]) shower_counter += 1 - assert shower_counter == shower_frag_primary.shape[0] - - # Attach endpoint to track fragments - track_counter = 0 - for p in temp: - if p.semantic_type == 1: - # p.group_id = int(track_group_pred[track_counter]) - p.startpoint = track_node_features[track_counter][19:22] - p.endpoint = track_node_features[track_counter][22:25] - track_counter += 1 - # assert track_counter == track_group_pred.shape[0] - - # Apply fragment voxel cut - out = [] - for p in temp: - if p.points.shape[0] < min_particle_voxel_count: - continue - out.append(p) + assert shower_counter == shower_frag_primary.shape[0] + + # Attach endpoint to track fragments + track_counter = 0 + for p in temp: + if p.semantic_type == 1: + # p.group_id = int(track_group_pred[track_counter]) + p.startpoint = track_node_features[track_counter][19:22] + p.endpoint = track_node_features[track_counter][22:25] + track_counter += 1 + # assert track_counter == track_group_pred.shape[0] + + # Apply fragment voxel cut + out = [] + for p in temp: + if p.points.shape[0] < min_particle_voxel_count: + continue + out.append(p) - # Check primaries and assign ppn points - if only_primaries: - out = [p for p in out if p.is_primary] + # Check primaries and assign ppn points + if only_primaries: + out = [p for p in out if p.is_primary] - if semantic_type is not None: - out = [p for p in out if p.semantic_type == semantic_type] + if semantic_type is not None: + out = [p for p in out if p.semantic_type == semantic_type] - if len(out) == 0: - return out + if len(out) == 0: + return out - ppn_results = self._fit_predict_ppn(entry) - match_points_to_particles(ppn_results, out, - ppn_distance_threshold=attaching_threshold) + ppn_results = self._fit_predict_ppn(entry) + match_points_to_particles(ppn_results, out, + ppn_distance_threshold=attaching_threshold) - return out + out_fragment_list.extend(out) + + return out_fragment_list def get_particles(self, entry, only_primaries=True, min_particle_voxel_count=-1, - attaching_threshold=2) -> List[Particle]: + attaching_threshold=2, + volume=None) -> List[Particle]: ''' Method for retriving particle list for given batch index. @@ -527,104 +612,124 @@ def get_particles(self, entry, only_primaries=True, with the closest Hausdorff distance to the particle point cloud (smallest point-to-set distance) - Inputs: - - entry: Batch number to retrieve example. - - primaries: If set to True, only retrieve predicted primaries. - Returns: - - out: List of instances (see Particle class definition). + Parameters + ========== + entry: int + Batch number to retrieve example. + only_primaries: bool, default True + If set to True, only retrieve predicted primaries. + min_particle_voxel_count: int, default -1 + attaching_threshold: int, default 2 + volume: int, default None + + Returns + ======= + list + List of instances (see Particle class definition). ''' + self._check_volume(volume) + if min_particle_voxel_count < 0: min_particle_voxel_count = self.min_particle_voxel_count - point_cloud = self.data_blob['input_data'][entry][:, 1:4] - depositions = self.result['input_rescaled'][entry][:, 4] - particles = self.result['particles'][entry] - # inter_group_pred = self.result['inter_group_pred'][entry] - #print(point_cloud.shape, depositions.shape, len(particles)) - particles_seg = self.result['particles_seg'][entry] + entries = self._get_entries(entry, volume) - type_logits = self.result['node_pred_type'][entry] - input_node_features = [None] * type_logits.shape[0] - if 'particle_node_features' in self.result: - input_node_features = self.result['particle_node_features'][entry] - pids = np.argmax(type_logits, axis=1) - - out = [] - if point_cloud.shape[0] == 0: - return out - assert len(particles_seg) == len(particles) - assert len(pids) == len(particles) - assert len(input_node_features) == len(particles) - assert point_cloud.shape[0] == depositions.shape[0] + out_particle_list = [] + for entry in entries: + volume = entry % self._num_volumes - node_pred_vtx = self.result['node_pred_vtx'][entry] + point_cloud = self.data_blob['input_data'][entry][:, 1:4] + depositions = self.result['input_rescaled'][entry][:, 4] + particles = self.result['particles'][entry] + # inter_group_pred = self.result['inter_group_pred'][entry] + #print(point_cloud.shape, depositions.shape, len(particles)) + particles_seg = self.result['particles_seg'][entry] - assert node_pred_vtx.shape[0] == len(particles) - - if ('inter_group_pred' in self.result) and ('particles' in self.result) and len(particles) > 0: - - assert len(self.result['inter_group_pred'][entry]) == len(particles) - inter_labels = self._fit_predict_interaction_labels(entry) - inter_ids = get_cluster_label(inter_labels.reshape(-1, 1), particles, column=0) - - else: - inter_ids = np.ones(len(particles)).astype(int) * -1 + type_logits = self.result['node_pred_type'][entry] + input_node_features = [None] * type_logits.shape[0] + if 'particle_node_features' in self.result: + input_node_features = self.result['particle_node_features'][entry] + pids = np.argmax(type_logits, axis=1) - for i, p in enumerate(particles): - voxels = point_cloud[p] - if voxels.shape[0] < min_particle_voxel_count: - continue - seg_label = particles_seg[i] - pid = pids[i] - if seg_label == 2 or seg_label == 3: - pid = 1 - interaction_id = inter_ids[i] - is_primary = bool(np.argmax(node_pred_vtx[i][3:])) - part = Particle(voxels, i, seg_label, interaction_id, - pid, - batch_id=entry, - voxel_indices=p, - depositions=depositions[p], - is_primary=is_primary, - pid_conf=softmax(type_logits[i])[pids[i]]) - - part._node_features = input_node_features[i] - out.append(part) + out = [] + if point_cloud.shape[0] == 0: + return out + assert len(particles_seg) == len(particles) + assert len(pids) == len(particles) + assert len(input_node_features) == len(particles) + assert point_cloud.shape[0] == depositions.shape[0] - if only_primaries: - out = [p for p in out if p.is_primary] + node_pred_vtx = self.result['node_pred_vtx'][entry] - if len(out) == 0: - return out + assert node_pred_vtx.shape[0] == len(particles) - ppn_results = self._fit_predict_ppn(entry) + if ('inter_group_pred' in self.result) and ('particles' in self.result) and len(particles) > 0: - # Get ppn candidates for particle - match_points_to_particles(ppn_results, out, - ppn_distance_threshold=attaching_threshold) + assert len(self.result['inter_group_pred'][entry]) == len(particles) + inter_labels = self._fit_predict_interaction_labels(entry) + inter_ids = get_cluster_label(inter_labels.reshape(-1, 1), particles, column=0) - # Attach startpoint and endpoint - # as done in full chain geometric encoder - for p in out: - if p.size < min_particle_voxel_count: - continue - if p.semantic_type == 0: - pt = p._node_features[19:22] - # Check startpoint is replicated - assert(np.sum( - np.abs(pt - p._node_features[22:25])) < 1e-12) - p.startpoint = pt - elif p.semantic_type == 1: - startpoint, endpoint = p._node_features[19:22], p._node_features[22:25] - p.startpoint = startpoint - p.endpoint = endpoint else: - continue + inter_ids = np.ones(len(particles)).astype(int) * -1 - return out + for i, p in enumerate(particles): + voxels = point_cloud[p] + if voxels.shape[0] < min_particle_voxel_count: + continue + seg_label = particles_seg[i] + pid = pids[i] + if seg_label == 2 or seg_label == 3: + pid = 1 + interaction_id = inter_ids[i] + is_primary = bool(np.argmax(node_pred_vtx[i][3:])) + part = Particle(self._translate(voxels, volume), + i, seg_label, interaction_id, + pid, + batch_id=entry, + voxel_indices=p, + depositions=depositions[p], + is_primary=is_primary, + pid_conf=softmax(type_logits[i])[pids[i]], + volume=volume) + + part._node_features = input_node_features[i] + out.append(part) + + if only_primaries: + out = [p for p in out if p.is_primary] + + if len(out) == 0: + return out + + ppn_results = self._fit_predict_ppn(entry) + + # Get ppn candidates for particle + match_points_to_particles(ppn_results, out, + ppn_distance_threshold=attaching_threshold) + + # Attach startpoint and endpoint + # as done in full chain geometric encoder + for p in out: + if p.size < min_particle_voxel_count: + continue + if p.semantic_type == 0: + pt = p._node_features[19:22] + # Check startpoint is replicated + assert(np.sum( + np.abs(pt - p._node_features[22:25])) < 1e-12) + p.startpoint = pt + elif p.semantic_type == 1: + startpoint, endpoint = p._node_features[19:22], p._node_features[22:25] + p.startpoint = startpoint + p.endpoint = endpoint + else: + continue + out_particle_list.extend(out) + return out_particle_list - def get_interactions(self, entry, drop_nonprimary_particles=True) -> List[Interaction]: + + def get_interactions(self, entry, drop_nonprimary_particles=True, volume=None) -> List[Interaction]: ''' Method for retriving interaction list for given batch index. @@ -644,36 +749,63 @@ def get_interactions(self, entry, drop_nonprimary_particles=True) -> List[Intera Returns: - out: List of instances (see particle.Interaction). ''' - particles = self.get_particles(entry, only_primaries=drop_nonprimary_particles) - out = group_particles_to_interactions_fn(particles) - for ia in out: - ia.vertex = self._fit_predict_vertex_info(entry, ia.id) - return out + self._check_volume(volume) + + entries = self._get_entries(entry, volume) + + out_interaction_list = [] + for e in entries: + volume = e % self._num_volumes + particles = self.get_particles(entry, only_primaries=drop_nonprimary_particles, volume=volume) + out = group_particles_to_interactions_fn(particles) + for ia in out: + ia.vertex = self._fit_predict_vertex_info(e, ia.id) + ia.volume = volume + out_interaction_list.extend(out) + + return out_interaction_list - def fit_predict_labels(self, entry): + def fit_predict_labels(self, entry, volume=None): ''' Predict all labels of a given batch index . We define to be 1d tensors that annotate voxels. ''' - pred_seg = self._fit_predict_semantics(entry) - pred_fragments = self._fit_predict_fragments(entry) - pred_groups = self._fit_predict_groups(entry) - pred_interaction_labels = self._fit_predict_interaction_labels(entry) - pred_pids = self._fit_predict_pids(entry) - - pred = { - 'segment': pred_seg, - 'fragment': pred_fragments, - 'group': pred_groups, - 'interaction': pred_interaction_labels, - 'pdg': pred_pids + self._check_volume(volume) + entries = self._get_entries(entry, volume) + + all_pred = { + 'segment': [], + 'fragment': [], + 'group': [], + 'interaction': [], + 'pdg': [] } + for entry in entries: + pred_seg = self._fit_predict_semantics(entry) + pred_fragments = self._fit_predict_fragments(entry) + pred_groups = self._fit_predict_groups(entry) + pred_interaction_labels = self._fit_predict_interaction_labels(entry) + pred_pids = self._fit_predict_pids(entry) + + pred = { + 'segment': pred_seg, + 'fragment': pred_fragments, + 'group': pred_groups, + 'interaction': pred_interaction_labels, + 'pdg': pred_pids + } + + for key in pred: + if len(all_pred[key]) == 0: + all_pred[key] = pred[key] + else: + all_pred[key] = np.concatenate([all_pred[key], pred[key]], axis=0) - self._pred = pred + self._pred = all_pred - return pred + return all_pred def fit_predict(self, **kwargs): @@ -693,7 +825,7 @@ def fit_predict(self, **kwargs): labels = [] list_particles, list_interactions = [], [] - for entry in range(self.num_images): + for entry in range(int(self.num_images / self._num_volumes)): pred_dict = self.fit_predict_labels(entry) labels.append(pred_dict) @@ -778,17 +910,56 @@ def __init__(self, data_blob, result, cfg, processor_cfg={}, **kwargs): super(FullChainEvaluator, self).__init__(data_blob, result, cfg, processor_cfg, **kwargs) self.michel_primary_ionization_only = processor_cfg.get('michel_primary_ionization_only', False) - def get_true_label(self, entry, name, schema='cluster_label'): + def get_true_label(self, entry, name, schema='cluster_label', volume=None): + """ + Retrieve tensor in data blob, labelled with `schema`. + + Parameters + ========== + entry: int + name: str + Must be a predefined name within `['segment', 'fragment', 'group', + 'interaction', 'pdg', 'nu']`. + schema: str + Key for dataset schema to retrieve the info from. + volume: int, default None + + Returns + ======= + np.array + """ if name not in self.LABEL_TO_COLUMN: raise KeyError("Invalid label identifier name: {}. "\ "Available column names = {}".format( name, str(list(self.LABEL_TO_COLUMN.keys())))) column_idx = self.LABEL_TO_COLUMN[name] - return self.data_blob[schema][entry][:, column_idx] + self._check_volume(volume) + + entries = self._get_entries(entry, volume) + out = [] + for entry in entries: + out.append(self.data_blob[schema][entry][:, column_idx]) + return np.concatenate(out, axis=0) + + + def get_predicted_label(self, entry, name, volume=None): + """ + Returns predicted quantities to label a plot. + + Parameters + ========== + entry: int + name: str + Must be a predefined name within `['segment', 'fragment', 'group', + 'interaction', 'pdg', 'nu']`. + volume: int, default None - def get_predicted_label(self, entry, name): - pred = self.fit_predict_labels(entry) + Returns + ======= + np.array + """ + pred = self.fit_predict_labels(entry, volume=volume) return pred[name] @@ -814,93 +985,104 @@ def _apply_true_voxel_cut(self, entry): return set(particles_exclude) - def get_true_fragments(self, entry, verbose=False) -> List[TruthParticleFragment]: + def get_true_fragments(self, entry, verbose=False, volume=None) -> List[TruthParticleFragment]: ''' Get list of instances for given batch id. ''' - # Both are "adapted" labels - labels = self.data_blob['cluster_label'][entry] - segment_label = self.data_blob['segment_label'][entry][:, -1] - rescaled_input_charge = self.result['input_rescaled'][entry][:, 4] - - fragment_ids = set(list(np.unique(labels[:, 5]).astype(int))) - fragments = [] - - for fid in fragment_ids: - mask = labels[:, 5] == fid - - semantic_type, counts = np.unique(labels[:, -1][mask], return_counts=True) - if semantic_type.shape[0] > 1: - if verbose: - print("Semantic Type of Fragment {} is not "\ - "unique: {}, {}".format(fid, - str(semantic_type), - str(counts))) - perm = counts.argmax() - semantic_type = semantic_type[perm] - else: - semantic_type = semantic_type[0] - - points = labels[mask][:, 1:4] - size = points.shape[0] - depositions = rescaled_input_charge[mask] - depositions_MeV = labels[mask][:, 4] - voxel_indices = np.where(mask)[0] - - group_id, counts = np.unique(labels[:, 6][mask].astype(int), return_counts=True) - if group_id.shape[0] > 1: - if verbose: - print("Group ID of Fragment {} is not "\ - "unique: {}, {}".format(fid, - str(group_id), - str(counts))) - perm = counts.argmax() - group_id = group_id[perm] - else: - group_id = group_id[0] - - interaction_id, counts = np.unique(labels[:, 7][mask].astype(int), return_counts=True) - if interaction_id.shape[0] > 1: - if verbose: - print("Interaction ID of Fragment {} is not "\ - "unique: {}, {}".format(fid, - str(interaction_id), - str(counts))) - perm = counts.argmax() - interaction_id = interaction_id[perm] - else: - interaction_id = interaction_id[0] - - - is_primary, counts = np.unique(labels[:, -2][mask].astype(bool), return_counts=True) - if is_primary.shape[0] > 1: - if verbose: - print("Primary label of Fragment {} is not "\ - "unique: {}, {}".format(fid, - str(is_primary), - str(counts))) - perm = counts.argmax() - is_primary = is_primary[perm] - else: - is_primary = is_primary[0] + self._check_volume(volume) + + entries = self._get_entries(entry, volume) + + out_fragments_list = [] + for entry in entries: + volume = entry % self._num_volumes + + # Both are "adapted" labels + labels = self.data_blob['cluster_label'][entry] + segment_label = self.data_blob['segment_label'][entry][:, -1] + rescaled_input_charge = self.result['input_rescaled'][entry][:, 4] + + fragment_ids = set(list(np.unique(labels[:, 5]).astype(int))) + fragments = [] + + for fid in fragment_ids: + mask = labels[:, 5] == fid + + semantic_type, counts = np.unique(labels[:, -1][mask], return_counts=True) + if semantic_type.shape[0] > 1: + if verbose: + print("Semantic Type of Fragment {} is not "\ + "unique: {}, {}".format(fid, + str(semantic_type), + str(counts))) + perm = counts.argmax() + semantic_type = semantic_type[perm] + else: + semantic_type = semantic_type[0] + + points = labels[mask][:, 1:4] + size = points.shape[0] + depositions = rescaled_input_charge[mask] + depositions_MeV = labels[mask][:, 4] + voxel_indices = np.where(mask)[0] + + group_id, counts = np.unique(labels[:, 6][mask].astype(int), return_counts=True) + if group_id.shape[0] > 1: + if verbose: + print("Group ID of Fragment {} is not "\ + "unique: {}, {}".format(fid, + str(group_id), + str(counts))) + perm = counts.argmax() + group_id = group_id[perm] + else: + group_id = group_id[0] + + interaction_id, counts = np.unique(labels[:, 7][mask].astype(int), return_counts=True) + if interaction_id.shape[0] > 1: + if verbose: + print("Interaction ID of Fragment {} is not "\ + "unique: {}, {}".format(fid, + str(interaction_id), + str(counts))) + perm = counts.argmax() + interaction_id = interaction_id[perm] + else: + interaction_id = interaction_id[0] + + + is_primary, counts = np.unique(labels[:, -2][mask].astype(bool), return_counts=True) + if is_primary.shape[0] > 1: + if verbose: + print("Primary label of Fragment {} is not "\ + "unique: {}, {}".format(fid, + str(is_primary), + str(counts))) + perm = counts.argmax() + is_primary = is_primary[perm] + else: + is_primary = is_primary[0] - part = TruthParticleFragment(points, fid, semantic_type, - interaction_id=interaction_id, - group_id=group_id, - image_id=entry, - voxel_indices=voxel_indices, - depositions=depositions, - depositions_MeV=depositions_MeV, - is_primary=is_primary, - alias='Fragment') + part = TruthParticleFragment(self._translate(points, volume), + fid, semantic_type, + interaction_id=interaction_id, + group_id=group_id, + image_id=entry, + voxel_indices=voxel_indices, + depositions=depositions, + depositions_MeV=depositions_MeV, + is_primary=is_primary, + alias='Fragment', + volume=volume) - fragments.append(part) + fragments.append(part) + out_fragments_list.extend(fragments) - return fragments + return out_fragments_list def get_true_particles(self, entry, only_primaries=True, - verbose=False) -> List[TruthParticle]: + verbose=False, volume=None) -> List[TruthParticle]: ''' Get list of instances for given batch id. @@ -918,217 +1100,298 @@ def get_true_particles(self, entry, only_primaries=True, id number p: true momentum vector ''' - labels = self.data_blob['cluster_label'][entry] - if self.deghosting: - labels_noghost = self.data_blob['cluster_label_noghost'][entry] - segment_label = self.data_blob['segment_label'][entry][:, -1] - particle_ids = set(list(np.unique(labels[:, 6]).astype(int))) - rescaled_input_charge = self.result['input_rescaled'][entry][:, 4] + self._check_volume(volume) - particles = [] - exclude_ids = set([]) + entries = self._get_entries(entry, volume) - for idx, p in enumerate(self.data_blob['particles_asis'][entry]): - pid = int(p.id()) - # 1. Check if current pid is one of the existing group ids - if pid not in particle_ids: - # print("PID {} not in particle_ids".format(pid)) - continue - is_primary = p.group_id() == p.parent_id() - if p.pdg_code() not in TYPE_LABELS: - # print("PID {} not in TYPE LABELS".format(pid)) - continue - # For deghosting inputs, perform voxel cut with true nonghost coords. - if self.deghosting: - exclude_ids = self._apply_true_voxel_cut(entry) - if pid in exclude_ids: - # Skip this particle if its below the voxel minimum requirement - # print("PID {} was excluded from the list of particles due"\ - # " to true nonghost voxel cut. Exclude IDS = {}".format( - # p.id(), str(exclude_ids) - # )) - continue + out_particles_list = [] + global_entry = entry + for entry in entries: + volume = entry % self._num_volumes - pdg = TYPE_LABELS[p.pdg_code()] - mask = labels[:, 6].astype(int) == pid + labels = self.data_blob['cluster_label'][entry] if self.deghosting: - mask_noghost = labels_noghost[:, 6].astype(int) == pid - # If particle is Michel electron, we have the option to - # only consider the primary ionization. - # Semantic labels only label the primary ionization as Michel. - # Cluster labels will have the entire Michel together. - if self.michel_primary_ionization_only and 2 in labels[mask][:, -1].astype(int): - mask = mask & (labels[:, -1].astype(int) == 2) + labels_noghost = self.data_blob['cluster_label_noghost'][entry] + segment_label = self.data_blob['segment_label'][entry][:, -1] + particle_ids = set(list(np.unique(labels[:, 6]).astype(int))) + rescaled_input_charge = self.result['input_rescaled'][entry][:, 4] + + particles = [] + exclude_ids = set([]) + + for idx, p in enumerate(self.data_blob['particles_asis'][global_entry]): + pid = int(p.id()) + # 1. Check if current pid is one of the existing group ids + if pid not in particle_ids: + # print("PID {} not in particle_ids".format(pid)) + continue + is_primary = p.group_id() == p.parent_id() + if p.pdg_code() not in TYPE_LABELS: + # print("PID {} not in TYPE LABELS".format(pid)) + continue + # For deghosting inputs, perform voxel cut with true nonghost coords. if self.deghosting: - mask_noghost = mask_noghost & (labels_noghost[:, -1].astype(int) == 2) - - # Check semantics - semantic_type, sem_counts = np.unique( - labels[mask][:, -1].astype(int), return_counts=True) - - if semantic_type.shape[0] > 1: - if verbose: - print("Semantic Type of Particle {} is not "\ - "unique: {}, {}".format(pid, - str(semantic_type), - str(sem_counts))) - perm = sem_counts.argmax() - semantic_type = semantic_type[perm] - else: - semantic_type = semantic_type[0] + exclude_ids = self._apply_true_voxel_cut(global_entry) + if pid in exclude_ids: + # Skip this particle if its below the voxel minimum requirement + # print("PID {} was excluded from the list of particles due"\ + # " to true nonghost voxel cut. Exclude IDS = {}".format( + # p.id(), str(exclude_ids) + # )) + continue + + pdg = TYPE_LABELS[p.pdg_code()] + mask = labels[:, 6].astype(int) == pid + if self.deghosting: + mask_noghost = labels_noghost[:, 6].astype(int) == pid + # If particle is Michel electron, we have the option to + # only consider the primary ionization. + # Semantic labels only label the primary ionization as Michel. + # Cluster labels will have the entire Michel together. + if self.michel_primary_ionization_only and 2 in labels[mask][:, -1].astype(int): + mask = mask & (labels[:, -1].astype(int) == 2) + if self.deghosting: + mask_noghost = mask_noghost & (labels_noghost[:, -1].astype(int) == 2) + + # Check semantics + semantic_type, sem_counts = np.unique( + labels[mask][:, -1].astype(int), return_counts=True) + + if semantic_type.shape[0] > 1: + if verbose: + print("Semantic Type of Particle {} is not "\ + "unique: {}, {}".format(pid, + str(semantic_type), + str(sem_counts))) + perm = sem_counts.argmax() + semantic_type = semantic_type[perm] + else: + semantic_type = semantic_type[0] - coords = self.data_blob['input_data'][entry][mask][:, 1:4] + coords = self.data_blob['input_data'][entry][mask][:, 1:4] - interaction_id, int_counts = np.unique(labels[mask][:, 7].astype(int), - return_counts=True) - if interaction_id.shape[0] > 1: - if verbose: - print("Interaction ID of Particle {} is not "\ - "unique: {}".format(pid, str(interaction_id))) - perm = int_counts.argmax() - interaction_id = interaction_id[perm] - else: - interaction_id = interaction_id[0] - - nu_id, nu_counts = np.unique(labels[mask][:, 8].astype(int), - return_counts=True) - if nu_id.shape[0] > 1: - if verbose: - print("Neutrino ID of Particle {} is not "\ - "unique: {}".format(pid, str(nu_id))) - perm = nu_counts.argmax() - nu_id = nu_id[perm] - else: - nu_id = nu_id[0] + interaction_id, int_counts = np.unique(labels[mask][:, 7].astype(int), + return_counts=True) + if interaction_id.shape[0] > 1: + if verbose: + print("Interaction ID of Particle {} is not "\ + "unique: {}".format(pid, str(interaction_id))) + perm = int_counts.argmax() + interaction_id = interaction_id[perm] + else: + interaction_id = interaction_id[0] + + nu_id, nu_counts = np.unique(labels[mask][:, 8].astype(int), + return_counts=True) + if nu_id.shape[0] > 1: + if verbose: + print("Neutrino ID of Particle {} is not "\ + "unique: {}".format(pid, str(nu_id))) + perm = nu_counts.argmax() + nu_id = nu_id[perm] + else: + nu_id = nu_id[0] - fragments = np.unique(labels[mask][:, 5].astype(int)) - depositions_MeV = labels[mask][:, 4] - depositions = rescaled_input_charge[mask] # Will be in ADC - coords_noghost, depositions_noghost = None, None - if self.deghosting: - coords_noghost = labels_noghost[mask_noghost][:, 1:4] - depositions_noghost = labels_noghost[mask_noghost][:, 4].squeeze() - - particle = TruthParticle(coords, pid, - semantic_type, interaction_id, pdg, - particle_asis=p, - batch_id=entry, - depositions=depositions, - is_primary=is_primary, - coords_noghost=coords_noghost, - depositions_noghost=depositions_noghost, - depositions_MeV=depositions_MeV) - - particle.p = np.array([p.px(), p.py(), p.pz()]) - particle.fragments = fragments - particle.particle_asis = p - particle.nu_id = nu_id - particle.voxel_indices = np.where(mask)[0] - - particle.startpoint = np.array([p.first_step().x(), - p.first_step().y(), - p.first_step().z()]) - - if semantic_type == 1: - particle.endpoint = np.array([p.last_step().x(), - p.last_step().y(), - p.last_step().z()]) - - if particle.voxel_indices.shape[0] >= self.min_particle_voxel_count: - particles.append(particle) + fragments = np.unique(labels[mask][:, 5].astype(int)) + depositions_MeV = labels[mask][:, 4] + depositions = rescaled_input_charge[mask] # Will be in ADC + coords_noghost, depositions_noghost = None, None + if self.deghosting: + coords_noghost = labels_noghost[mask_noghost][:, 1:4] + depositions_noghost = labels_noghost[mask_noghost][:, 4].squeeze() + + particle = TruthParticle(self._translate(coords, volume), + pid, + semantic_type, interaction_id, pdg, + particle_asis=p, + batch_id=entry, + depositions=depositions, + is_primary=is_primary, + coords_noghost=coords_noghost, + depositions_noghost=depositions_noghost, + depositions_MeV=depositions_MeV, + volume=entry % self._num_volumes) + + particle.p = np.array([p.px(), p.py(), p.pz()]) + particle.fragments = fragments + particle.particle_asis = p + particle.nu_id = nu_id + particle.voxel_indices = np.where(mask)[0] + + particle.startpoint = np.array([p.first_step().x(), + p.first_step().y(), + p.first_step().z()]) + + if semantic_type == 1: + particle.endpoint = np.array([p.last_step().x(), + p.last_step().y(), + p.last_step().z()]) + + if particle.voxel_indices.shape[0] >= self.min_particle_voxel_count: + particles.append(particle) + + out_particles_list.extend(particles) if only_primaries: - particles = [p for p in particles if p.is_primary] + out_particles_list = [p for p in out_particles_list if p.is_primary] - return particles + return out_particles_list def get_true_interactions(self, entry, drop_nonprimary_particles=True, - min_particle_voxel_count=-1) -> List[Interaction]: + min_particle_voxel_count=-1, + volume=None) -> List[Interaction]: + self._check_volume(volume) if min_particle_voxel_count < 0: min_particle_voxel_count = self.min_particle_voxel_count - true_particles = self.get_true_particles(entry, only_primaries=drop_nonprimary_particles) - out = group_particles_to_interactions_fn(true_particles, - get_nu_id=True, mode='truth') - vertices = self.get_true_vertices(entry) - for ia in out: - ia.vertex = vertices[ia.id] - return out + entries = self._get_entries(entry, volume) + out_interactions_list = [] + for e in entries: + volume = e % self._num_volumes + true_particles = self.get_true_particles(entry, only_primaries=drop_nonprimary_particles, volume=volume) + out = group_particles_to_interactions_fn(true_particles, + get_nu_id=True, mode='truth') + vertices = self.get_true_vertices(entry, volume=volume) + for ia in out: + ia.vertex = vertices[ia.id] + ia.volume = volume + out_interactions_list.extend(out) + + return out_interactions_list + + def get_true_vertices(self, entry, volume=None): + """ + Parameters + ========== + entry: int + volume: int, default None + + Returns + ======= + dict + Keys are true interactions ids, values are np.array of shape (N, 3) + with true vertices coordinates. + """ + self._check_volume(volume) - def get_true_vertices(self, entry): - inter_idxs = np.unique( - self.data_blob['cluster_label'][entry][:, 7].astype(int)) + entries = self._get_entries(entry, volume) out = {} - for inter_idx in inter_idxs: - if inter_idx < 0: - continue - vtx = get_vertex(self.data_blob['kinematics_label'], - self.data_blob['cluster_label'], - data_idx=entry, - inter_idx=inter_idx) - out[inter_idx] = vtx + for entry in entries: + volume = entry % self._num_volumes + inter_idxs = np.unique( + self.data_blob['cluster_label'][entry][:, 7].astype(int)) + for inter_idx in inter_idxs: + if inter_idx < 0: + continue + vtx = get_vertex(self.data_blob['kinematics_label'], + self.data_blob['cluster_label'], + data_idx=entry, + inter_idx=inter_idx) + out[inter_idx] = self._translate(vtx, volume) + return out def match_particles(self, entry, only_primaries=False, - mode='pred_to_true', **kwargs): + mode='pred_to_true', + volume=None, **kwargs): ''' Returns (, None) if no match was found + + Parameters + ========== + entry: int + only_primaries: bool, default False + mode: str, default 'pred_to_true' + Must be either 'pred_to_true' or 'true_to_pred' + volume: int, default None ''' - if mode == 'pred_to_true': - # Match each pred to one in true - particles_from = self.get_particles(entry, only_primaries=only_primaries) - particles_to = self.get_true_particles(entry, only_primaries=only_primaries) - elif mode == 'true_to_pred': - # Match each true to one in pred - particles_to = self.get_particles(entry, only_primaries=only_primaries) - particles_from = self.get_true_particles(entry, only_primaries=only_primaries) - else: - raise ValueError("Mode {} is not valid. For matching each"\ - " prediction to truth, use 'pred_to_true' (and vice versa).".format(mode)) - matched_pairs, _, _ = match_particles_fn(particles_from, particles_to, - min_overlap=self.min_overlap_count, - overlap_mode=self.overlap_mode, - **kwargs) - return matched_pairs + self._check_volume(volume) + + entries = self._get_entries(entry, volume) + all_matches = [] + for e in entries: + volume = e % self._num_volumes + if mode == 'pred_to_true': + # Match each pred to one in true + particles_from = self.get_particles(entry, only_primaries=only_primaries, volume=volume) + particles_to = self.get_true_particles(entry, only_primaries=only_primaries, volume=volume) + elif mode == 'true_to_pred': + # Match each true to one in pred + particles_to = self.get_particles(entry, only_primaries=only_primaries, volume=volume) + particles_from = self.get_true_particles(entry, only_primaries=only_primaries, volume=volume) + else: + raise ValueError("Mode {} is not valid. For matching each"\ + " prediction to truth, use 'pred_to_true' (and vice versa).".format(mode)) + matched_pairs, _, _ = match_particles_fn(particles_from, particles_to, + min_overlap=self.min_overlap_count, + overlap_mode=self.overlap_mode, + **kwargs) + all_matches.extend(matched_pairs) + return all_matches def match_interactions(self, entry, mode='pred_to_true', drop_nonprimary_particles=True, match_particles=True, - return_counts=False, **kwargs): - if mode == 'pred_to_true': - ints_from = self.get_interactions(entry, drop_nonprimary_particles=drop_nonprimary_particles) - ints_to = self.get_true_interactions(entry, drop_nonprimary_particles=drop_nonprimary_particles) - elif mode == 'true_to_pred': - ints_to = self.get_interactions(entry, drop_nonprimary_particles=drop_nonprimary_particles) - ints_from = self.get_true_interactions(entry, drop_nonprimary_particles=drop_nonprimary_particles) - else: - raise ValueError("Mode {} is not valid. For matching each"\ - " prediction to truth, use 'pred_to_true' (and vice versa).".format(mode)) + return_counts=False, + volume=None, **kwargs): + """ + Parameters + ========== + entry: int + mode: str, default 'pred_to_true' + Must be either 'pred_to_true' or 'true_to_pred'. + drop_nonprimary_particles: bool, default True + match_particles: bool, default True + return_counts: bool, default False + volume: int, default None - matched_interactions, _, counts = match_interactions_fn(ints_from, ints_to, + Returns + ======= + List[Tuple[Interaction, Interaction]] + List of tuples, indicating the matched interactions. + """ + self._check_volume(volume) + + entries = self._get_entries(entry, volume) + all_matches, all_counts = [], [] + for e in entries: + volume = e % self._num_volumes + if mode == 'pred_to_true': + ints_from = self.get_interactions(entry, drop_nonprimary_particles=drop_nonprimary_particles, volume=volume) + ints_to = self.get_true_interactions(entry, drop_nonprimary_particles=drop_nonprimary_particles, volume=volume) + elif mode == 'true_to_pred': + ints_to = self.get_interactions(entry, drop_nonprimary_particles=drop_nonprimary_particles, volume=volume) + ints_from = self.get_true_interactions(entry, drop_nonprimary_particles=drop_nonprimary_particles, volume=volume) + else: + raise ValueError("Mode {} is not valid. For matching each"\ + " prediction to truth, use 'pred_to_true' (and vice versa).".format(mode)) + + matched_interactions, _, counts = match_interactions_fn(ints_from, ints_to, + min_overlap=self.min_overlap_count, + **kwargs) + + if match_particles: + for interactions in matched_interactions: + domain, codomain = interactions + if codomain is None: + domain_particles, codomain_particles = domain.particles, [] + else: + domain_particles, codomain_particles = domain.particles, codomain.particles + # continue + matched_particles, _, _ = match_particles_fn(domain_particles, codomain_particles, min_overlap=self.min_overlap_count, - **kwargs) - - if match_particles: - for interactions in matched_interactions: - domain, codomain = interactions - if codomain is None: - domain_particles, codomain_particles = domain.particles, [] - else: - domain_particles, codomain_particles = domain.particles, codomain.particles - # continue - matched_particles, _, _ = match_particles_fn(domain_particles, codomain_particles, - min_overlap=self.min_overlap_count, - overlap_mode=self.overlap_mode) + overlap_mode=self.overlap_mode) + all_matches.extend(matched_interactions) + all_counts.extend(counts) if return_counts: - return matched_interactions, counts + return all_matches, all_counts else: - return matched_interactions + return all_matches diff --git a/mlreco/iotools/collates.py b/mlreco/iotools/collates.py index 9809411a..24306037 100644 --- a/mlreco/iotools/collates.py +++ b/mlreco/iotools/collates.py @@ -62,6 +62,73 @@ def __init__(self, definitions): continue self.boundaries[i].sort() # Ascending order + n_boundaries = [len(self.boundaries[n]) if self.boundaries[n] is not None else 0 for n in range(self.dim)] + # Generate indices that describe all volumes + all_index = [] + for n in range(self.dim): + all_index.append(np.arange(n_boundaries[n]+1)) + self.combo = np.array(np.meshgrid(*tuple(all_index))).T.reshape(-1, self.dim) + + # Generate coordinate shifts for each volume + # List of list (1st dim is spatial dimension, 2nd is volume splits in a given spatial dimension) + shifts = [] + for n in range(self.dim): + if self.boundaries[n] is None: + shifts.append([0.]) + continue + dim_shifts = [] + for i in range(len(self.boundaries[n])): + dim_shifts.append(self.boundaries[n][i-1] if i > 0 else 0.) + dim_shifts.append(self.boundaries[n][-1]) + shifts.append(dim_shifts) + self.shifts = shifts + + def num_volumes(self): + """ + Returns + ======= + int + """ + return len(self.combo) + + def virtual_batch_ids(self, entry=0): + """ + Parameters + ========== + entry: int, optional + Which entry of the dataset you are trying to access. + + Returns + ======= + list + List of virtual batch ids that correspond to this entry. + """ + return np.arange(len(self.combo)) + entry * self.num_volumes() + + def translate(self, voxels, volume): + """ + Meant to reverse what the split method does: for voxels coordinates initially in the range of volume 0, + translate to the range of a specific volume given in argument. + + Parameters + ========== + voxels: np.ndarray + Expected shape is (D_0, ..., D_N, self.dim) with N >=0. In other words, voxels can be a list of + coordinate or a single coordinate with shape (d,). + volume: int + + Returns + ======= + np.ndarray + Translated voxels array, using internally computed shifts. + """ + assert volume >= 0 and volume < self.num_volumes() + assert voxels.shape[-1] == self.dim + + new_voxels = voxels.copy() + for n in range(self.dim): + new_voxels[..., n] += int(self.shifts[n][self.combo[volume][n]]) + return new_voxels def split(self, voxels): """ @@ -79,45 +146,35 @@ def split(self, voxels): This is a permutation mask which can be used to apply the lexsort to both the new voxels and the features or data tensor (which is not passed to this function). """ + assert len(voxels.shape) == 2 + batch_ids = voxels[:, 0] coords = voxels[:, 1:] - assert len(coords.shape) == 2 assert self.dim == coords.shape[1] - all_boundaries, shifts = [], [] - n_boundaries =[] + # This will contain the list of boolean masks corresponding to each boundary + # in each spatial dimension (so, list of list) + all_boundaries = [] for n in range(self.dim): if self.boundaries[n] is None: all_boundaries.append([np.ones((coords.shape[0],), dtype=bool)]) - shifts.append([0.]) - n_boundaries.append(0) continue dim_boundaries = [] - dim_shifts = [] for i in range(len(self.boundaries[n])): dim_boundaries.append( coords[:, n] < self.boundaries[n][i] ) - dim_shifts.append(self.boundaries[n][i-1] if i > 0 else 0.) dim_boundaries.append( coords[:, n] >= self.boundaries[n][-1] ) - dim_shifts.append(self.boundaries[n][-1]) all_boundaries.append(dim_boundaries) - shifts.append(dim_shifts) - n_boundaries.append(len(self.boundaries[n])) - - #n_volumes = np.prod([len(x) for x in all_boundaries]) - # Generate indices - all_index = [] - for n in range(self.dim): - all_index.append(np.arange(n_boundaries[n]+1)) - combo = np.array(np.meshgrid(*tuple(all_index))).T.reshape(-1, self.dim) virtual_batch_ids = np.zeros((coords.shape[0],), dtype=np.int32) new_coords = coords.copy() - for idx, c in enumerate(combo): - m = all_boundaries[0][c[0]] + for idx, c in enumerate(self.combo): # Looping over volumes + m = all_boundaries[0][c[0]] # Building a boolean mask for this volume for n in range(1, self.dim): m = np.logical_and(m, all_boundaries[n][c[n]]) - virtual_batch_ids[m] = idx + # Now defining virtual batch id + # We need to take into account original batch id + virtual_batch_ids[m] = idx + batch_ids[m] * self.num_volumes() for n in range(self.dim): - new_coords[m, n] -= int(shifts[n][c[n]]) + new_coords[m, n] -= int(self.shifts[n][c[n]]) new_voxels = np.concatenate([virtual_batch_ids[:, None], new_coords], axis=1) perm = np.lexsort(new_voxels.T[list(range(1, self.dim+1)) + [0], :]) diff --git a/mlreco/models/layers/common/gnn_full_chain.py b/mlreco/models/layers/common/gnn_full_chain.py index bf26c796..1a0af553 100644 --- a/mlreco/models/layers/common/gnn_full_chain.py +++ b/mlreco/models/layers/common/gnn_full_chain.py @@ -186,6 +186,7 @@ def run_fragment_gnns(self, result, input): frag_dict = self.get_all_fragments(result, input) fragments = frag_dict['frags'] frag_seg = frag_dict['frag_seg'] + print('run fragment gnns', np.unique(frag_seg, return_counts=True), len(fragments)) if self.enable_gnn_shower: @@ -217,6 +218,7 @@ def run_fragment_gnns(self, result, input): fragments[em_mask], output_keys, kwargs) + print('run fragment gnns', len(result['shower_fragments'][0][0]), len(fragments[em_mask]), len(fragments)) if self.enable_gnn_track: diff --git a/mlreco/utils/cluster/fragmenter.py b/mlreco/utils/cluster/fragmenter.py index 4d38dc26..537834fa 100644 --- a/mlreco/utils/cluster/fragmenter.py +++ b/mlreco/utils/cluster/fragmenter.py @@ -23,6 +23,7 @@ def format_fragments(fragments, frag_batch_ids, frag_seg, batch_column, batch_si dtype=object if not same_length else np.int64) frag_batch_ids_np = np.array(frag_batch_ids) frag_seg_np = np.array(frag_seg) + print('format fragments', np.unique(frag_batch_ids_np, return_counts=True), np.unique(frag_seg_np, return_counts=True)) batches, counts = torch.unique(batch_column, return_counts=True) # In case one of the events is "missing" and len(counts) < batch_size diff --git a/mlreco/utils/gnn/data.py b/mlreco/utils/gnn/data.py index cbb8c45a..1c91eecc 100644 --- a/mlreco/utils/gnn/data.py +++ b/mlreco/utils/gnn/data.py @@ -208,6 +208,7 @@ def _get_extra_gnn_features(fragments, for c in classes: mask |= (frag_seg == c) mask = np.where(mask)[0] + print('mask', np.unique(frag_seg, return_counts=True), frag_seg.shape, len(mask), classes) #print("INPUT = ", input) From 308ccd2fd0b89fea47ecc76df1378c0365e5dcfb Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Tue, 1 Nov 2022 13:17:11 -0700 Subject: [PATCH 31/52] Temporarly add parsers which compute rescaled charge on the fly. Will be removed later --- mlreco/iotools/parsers/__init__.py | 2 ++ mlreco/iotools/parsers/cluster.py | 21 +++++++++++++++++++++ mlreco/iotools/parsers/sparse.py | 16 ++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/mlreco/iotools/parsers/__init__.py b/mlreco/iotools/parsers/__init__.py index a32f7fb8..f63dc3e4 100644 --- a/mlreco/iotools/parsers/__init__.py +++ b/mlreco/iotools/parsers/__init__.py @@ -75,6 +75,7 @@ parse_sparse2d, parse_sparse3d, parse_sparse3d_ghost, + parse_sparse3d_charge_rescaled, # TEMPORARY parse_sparse2d_scn, # Deprecated parse_sparse3d_scn # Depreacted ) @@ -82,6 +83,7 @@ from mlreco.iotools.parsers.cluster import ( parse_cluster2d, parse_cluster3d, + parse_cluster3d_charge_rescaled, # TEMPORARY parse_cluster3d_kinematics_clean, # Deprecated parse_cluster3d_clean_full # Depreacted ) diff --git a/mlreco/iotools/parsers/cluster.py b/mlreco/iotools/parsers/cluster.py index 2e954ba6..abc44f05 100644 --- a/mlreco/iotools/parsers/cluster.py +++ b/mlreco/iotools/parsers/cluster.py @@ -192,6 +192,27 @@ def parse_cluster3d(cluster_event, return np_voxels, np_features +def parse_cluster3d_charge_rescaled(cluster_event, + particle_event = None, + particle_mpv_event = None, + sparse_semantics_event = None, + sparse_value_event_list = None, + add_particle_info = False, + add_kinematics_info = False, + clean_data = True, + precedence = [1,2,0,3,4], + type_include_mpr = False): + # Produces cluster3d labels with sparse3d_reco_rescaled on the fly on datasets that do not have it + np_voxels, np_features = parse_cluster3d(cluster_event, particle_event, particle_mpv_event, sparse_semantics_event, None, + add_particle_info, add_kinematics_info, clean_data, precedence, type_include_mpr) + + from .sparse import parse_sparse3d_charge_rescaled + _, val_features = parse_sparse3d_charge_rescaled(sparse_value_event_list) + np_features[:,0] = val_features[:,-1] + + return np_voxels, np_features + + def parse_cluster3d_clean_full(cluster_event, particle_event, particle_mpv_event=None, sparse_semantics_event=None): from warnings import warn warn("Deprecated: parse_cluster3d_clean_full deprecated, use parse_cluster3d instead", DeprecationWarning) diff --git a/mlreco/iotools/parsers/sparse.py b/mlreco/iotools/parsers/sparse.py index 01a18f9d..b50e2872 100644 --- a/mlreco/iotools/parsers/sparse.py +++ b/mlreco/iotools/parsers/sparse.py @@ -131,6 +131,22 @@ def parse_sparse3d_ghost(sparse_event_semantics): return np_voxels, (np_data==5).astype(np.float32) +def parse_sparse3d_charge_rescaled(sparse_event_list): + # Produces sparse3d_reco_rescaled on the fly on datasets that do not have it + np_voxels, output = parse_sparse3d(sparse_event_list) + + deghost = output[:, -1] < 5 + hit_charges = output[deghost, :3] + hit_ids = output[deghost, 3:6] + pmask = hit_ids > -1 + + _, inverse, counts = np.unique(hit_ids, return_inverse=True, return_counts=True) + multiplicity = counts[inverse].reshape(-1,3) + charges = np.sum((hit_charges*pmask)/multiplicity, axis=1)/np.sum(pmask, axis=1) + + return np_voxels[deghost], charges.reshape(-1,1) + + def parse_sparse2d_scn(sparse_event_list): from warnings import warn warn("Deprecated: parse_sparse2d_scn deprecated, use parse_sparse2d instead", DeprecationWarning) From 5b4e4ad8905bd188389c353ed15546f33fa6b842 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Tue, 1 Nov 2022 16:23:53 -0700 Subject: [PATCH 32/52] Bug fix in training curve drawing tool --- mlreco/visualization/training.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/mlreco/visualization/training.py b/mlreco/visualization/training.py index 879f5674..b61b5770 100644 --- a/mlreco/visualization/training.py +++ b/mlreco/visualization/training.py @@ -197,7 +197,6 @@ def draw_training_curves(log_dir, models, metrics, legend=dict(font=dict(size=20))) if len(models) == 1 and same_plot: layout['legend']['title'] = model_names[models[0]] if models[0] in model_names else models[0] - # If there is >1 subplot, prepare the canvas if not same_plot: @@ -233,13 +232,13 @@ def draw_training_curves(log_dir, models, metrics, val_dfs[key] = get_validation_df(log_subdir, metrics, val_prefix) colors[key] = plotly_colors[i] - # Draw the requested metrics + # Loop over the requested metrics for i, metric_list in enumerate(metrics): - # Draw the training curves - metric, metric_name = find_key(dfs[key], metric_list) + # Get a graph per training campaign for j, key in enumerate(dfs.keys()): # Get the necessary data epoch_train = dfs[key]['epoch'][:max_iter:step] + metric, metric_name = find_key(dfs[key], metric_list) metric_train = dfs[key][metric][:max_iter:step] if smoothing == 1 else dfs[key][metric][:max_iter].rolling(smoothing, min_periods=1, center=True).mean()[::step] draw_val = bool(len(val_dfs[key]['iter'])) if draw_val: @@ -290,7 +289,6 @@ def draw_training_curves(log_dir, models, metrics, else: plt.xlabel('Epochs') ylabel = metric_names[metrics[0]] if metrics[0] in metric_names else metrics[0] - print(ylabel) plt.ylabel(ylabel if len(metrics) == 1 else 'Metric') plt.gca().set_ylim(limits) legend_title = model_names[models[0]] if models[0] in model_names else models[0] From 6cbcad70fc4cf365523cac221c82b4a35cd2b30d Mon Sep 17 00:00:00 2001 From: Temigo Date: Wed, 2 Nov 2022 12:04:31 -0700 Subject: [PATCH 33/52] Remove debugging print statements + small fix in VolumeBoundaries --- mlreco/iotools/collates.py | 3 ++- mlreco/models/layers/common/gnn_full_chain.py | 2 -- mlreco/utils/cluster/fragmenter.py | 1 - mlreco/utils/gnn/data.py | 1 - 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/mlreco/iotools/collates.py b/mlreco/iotools/collates.py index 24306037..ee040b03 100644 --- a/mlreco/iotools/collates.py +++ b/mlreco/iotools/collates.py @@ -56,10 +56,11 @@ def __init__(self, definitions): # Quick sanity check for i in range(self.dim): - assert self.boundaries[i] == 'None' or (isinstance(self.boundaries[i], list) and len(self.boundaries[i]) > 0) + assert self.boundaries[i] == 'None' or self.boundaries[i] is None or (isinstance(self.boundaries[i], list) and len(self.boundaries[i]) > 0) if self.boundaries[i] == 'None': self.boundaries[i] = None continue + if self.boundaries[i] is None: continue self.boundaries[i].sort() # Ascending order n_boundaries = [len(self.boundaries[n]) if self.boundaries[n] is not None else 0 for n in range(self.dim)] diff --git a/mlreco/models/layers/common/gnn_full_chain.py b/mlreco/models/layers/common/gnn_full_chain.py index 1a0af553..bf26c796 100644 --- a/mlreco/models/layers/common/gnn_full_chain.py +++ b/mlreco/models/layers/common/gnn_full_chain.py @@ -186,7 +186,6 @@ def run_fragment_gnns(self, result, input): frag_dict = self.get_all_fragments(result, input) fragments = frag_dict['frags'] frag_seg = frag_dict['frag_seg'] - print('run fragment gnns', np.unique(frag_seg, return_counts=True), len(fragments)) if self.enable_gnn_shower: @@ -218,7 +217,6 @@ def run_fragment_gnns(self, result, input): fragments[em_mask], output_keys, kwargs) - print('run fragment gnns', len(result['shower_fragments'][0][0]), len(fragments[em_mask]), len(fragments)) if self.enable_gnn_track: diff --git a/mlreco/utils/cluster/fragmenter.py b/mlreco/utils/cluster/fragmenter.py index 537834fa..4d38dc26 100644 --- a/mlreco/utils/cluster/fragmenter.py +++ b/mlreco/utils/cluster/fragmenter.py @@ -23,7 +23,6 @@ def format_fragments(fragments, frag_batch_ids, frag_seg, batch_column, batch_si dtype=object if not same_length else np.int64) frag_batch_ids_np = np.array(frag_batch_ids) frag_seg_np = np.array(frag_seg) - print('format fragments', np.unique(frag_batch_ids_np, return_counts=True), np.unique(frag_seg_np, return_counts=True)) batches, counts = torch.unique(batch_column, return_counts=True) # In case one of the events is "missing" and len(counts) < batch_size diff --git a/mlreco/utils/gnn/data.py b/mlreco/utils/gnn/data.py index 1c91eecc..cbb8c45a 100644 --- a/mlreco/utils/gnn/data.py +++ b/mlreco/utils/gnn/data.py @@ -208,7 +208,6 @@ def _get_extra_gnn_features(fragments, for c in classes: mask |= (frag_seg == c) mask = np.where(mask)[0] - print('mask', np.unique(frag_seg, return_counts=True), frag_seg.shape, len(mask), classes) #print("INPUT = ", input) From 52510f3cd6c451511b1f1c04606cdb2d3712e853 Mon Sep 17 00:00:00 2001 From: Temigo Date: Thu, 3 Nov 2022 16:45:29 -0700 Subject: [PATCH 34/52] FlashManager + fix get_nu_id + parse_opflash --- analysis/classes/FlashManager.py | 206 +++++++++++++++++++++++++++++ analysis/classes/Interaction.py | 7 + analysis/classes/__init__.py | 1 + mlreco/iotools/parsers/__init__.py | 3 +- mlreco/iotools/parsers/misc.py | 22 +++ mlreco/utils/groups.py | 10 +- 6 files changed, 246 insertions(+), 3 deletions(-) create mode 100644 analysis/classes/FlashManager.py diff --git a/analysis/classes/FlashManager.py b/analysis/classes/FlashManager.py new file mode 100644 index 00000000..2ddf6326 --- /dev/null +++ b/analysis/classes/FlashManager.py @@ -0,0 +1,206 @@ +import os, sys + + +class FlashManager: + def __init__(self, cfg, cfg_fmatch, meta=None, detector_specs=None): + + # Setup OpT0finder + basedir = os.getenv('FMATCH_BASEDIR') + if basedir is None: + raise Exception("You need to source OpT0Finder configure.sh first, or set the FMATCH_BASEDIR environment variable.") + + sys.path.append(os.path.join(basedir, 'python')) + #print(os.getenv('LD_LIBRARY_PATH'), os.getenv('ROOT_INCLUDE_PATH')) + os.environ['LD_LIBRARY_PATH'] = "%s:%s" % (os.path.join(basedir, 'build/lib'), os.environ['LD_LIBRARY_PATH']) + #os.environ['ROOT_INCLUDE_PATH'] = os.path.join(basedir, 'build/include') + #print(os.environ['LD_LIBRARY_PATH'], os.environ['ROOT_INCLUDE_PATH']) + if 'FMATCH_DATADIR' not in os.environ: # needed for loading detector specs + os.environ['FMATCH_DATADIR'] = os.path.join(basedir, 'dat') + import ROOT + + import flashmatch + from flashmatch.visualization import plotly_layout3d, plot_track, plot_flash, plot_qcluster + from flashmatch import flashmatch, geoalgo + + # Setup meta + self.cfg = cfg + + self.min_x, self.min_y, self.min_z = None, None, None + self.size_voxel_x, self.size_voxel_y, self.size_voxel_z = None, None, None + if meta is not None: + self.min_x = meta[0] + self.min_y = meta[1] + self.min_z = meta[2] + self.size_voxel_x = meta[6] + self.size_voxel_y = meta[7] + self.size_voxel_z = meta[8] + + # Setup flash matching + print('Setting up OpT0Finder for flash matching...') + self.mgr = flashmatch.FlashMatchManager() + cfg = flashmatch.CreatePSetFromFile(cfg_fmatch) + if detector_specs is None: + self.det = flashmatch.DetectorSpecs.GetME(os.path.join(basedir, 'dat/detector_specs.cfg')) + else: + assert isinstance(detector_specs, str) + if not os.path.exists(detector_specs): + raise Exception("Detector specs file not found") + + self.det = flashmatch.DetectorSpecs.GetME(detector_specs) + self.mgr.Configure(cfg) + print('...done.') + + self.all_matches = None + self.pmt_v, self.tpc_v = None, None + + def get_flash(self, flash_id, array=False): + from flashmatch import flashmatch + + if self.pmt_v is None: + raise Exception("self.pmt_v is None") + + for flash in self.pmt_v: + if flash.idx != flash_id: continue + if array: return flashmatch.as_ndarray(flash) + else: return flash + + raise Exception("Flash %d does not exist in self.pmt_v" % flash_id) + + def get_qcluster(self, tpc_id, array=False): + from flashmatch import flashmatch + + if self.tpc_v is None: + raise Exception("self.tpc_v is None") + + for tpc in self.tpc_v: + if tpc.idx != tpc_id: continue + if array: return flashmatch.as_ndarray(tpc) + else: return tpc + + raise Exception("TPC object %d does not exist in self.tpc_v" % tpc_id) + + def make_qcluster(self, interactions): + from flashmatch import flashmatch + + if self.min_x is None: + raise Exception('min_x is None') + + tpc_v = [] + for p in interactions: + qcluster = flashmatch.QCluster_t() + qcluster.idx = int(p.id) # Assign a unique index + qcluster.time = 0 # assumed time w.r.t. trigger for reconstruction + for i in range(p.size): + # Create a geoalgo::QPoint_t + qpoint = flashmatch.QPoint_t( + p.points[i, 0] * self.size_voxel_x + self.min_x, + p.points[i, 1] * self.size_voxel_y + self.min_y, + p.points[i, 2] * self.size_voxel_z + self.min_z, + p.depositions[i]) + # Add it to geoalgo::QCluster_t + qcluster.push_back(qpoint) + tpc_v.append(qcluster) + + if self.tpc_v is not None: + print("Warning: overwriting internal list of particles.") + self.tpc_v = tpc_v + return tpc_v + + def make_flash(self, larcv_flashes): + """ + Parameters + ========== + larcv_flashes: list of list of larcv::Flash + + Returns + ======= + list of flashmatch::Flash_t + """ + from flashmatch import flashmatch + + flashes = [] + for branch in larcv_flashes: + flashes.extend(branch) + + pmt_v = [] + for idx, f in enumerate(flashes): + # f is an object of type larcv::Flash + flash = flashmatch.Flash_t() + flash.idx = f.id() # Assign a unique index + flash.time = f.time() # Flash timing, a candidate T0 + + # Assign the flash position and error on this position + flash.x, flash.y, flash.z = 0, 0, 0 + flash.x_err, flash.y_err, flash.z_err = 0, 0, 0 + + # PE distribution over the 360 photodetectors + #flash.pe_v = f.PEPerOpDet() + #for i in range(360): + offset = 0 if len(f.PEPerOpDet()) == 180 else 180 + for i in range(180): + flash.pe_v.push_back(f.PEPerOpDet()[i + offset]) + flash.pe_err_v.push_back(0.) + pmt_v.append(flash) + if self.pmt_v is not None: + print("Warning: overwriting internal list of flashes.") + self.pmt_v = pmt_v + return pmt_v + + def run_flash_matching(self, flashes=None, interactions=None): + if self.tpc_v is None: + if interactions is None: + raise Exception('You need to specify `interactions`, or to run make_qcluster.') + if interactions is not None: + self.make_qcluster(interactions) + + + if self.pmt_v is None: + if flashes is None: + raise Exception("PMT objects need to be defined. Either specify `flashes`, or run make_flash.") + if flashes is not None: + self.make_flash(flashes) + + assert self.tpc_v is not None and self.pmt_v is not None + + self.mgr.Reset() + + # First register all objects in manager + for x in self.tpc_v: + self.mgr.Add(x) + for x in self.pmt_v: + self.mgr.Add(x) + + # Run the matching + if self.all_matches is not None: + print("Warning: overwriting internal list of matches.") + self.all_matches = self.mgr.Match() + return self.all_matches + + def get_match(self, idx, matches=None): + if matches is None: + if self.all_matches is None: + raise Exception("Need to run flash matching first with run_flash_matching.") + matches = self.all_matches + + for m in self.all_matches: + if self.tpc_v[m.tpc_id].idx != idx: continue + return m + + return None + + def get_matched_flash(self, idx, matches=None): + m = self.get_match(idx, matches=matches) + if m is None: return None + + flash_id = m.flash_id + if flash_id is None: return None + + if flash_id > len(self.pmt_v): + raise Exception("Could not find flash id %d in self.pmt_v" % flash_id) + + return self.pmt_v[flash_id] + + + def get_t0(self, idx, matches=None): + flash = self.get_matched_flash(idx, matches=matches) + return None if flash is None else flash.time diff --git a/analysis/classes/Interaction.py b/analysis/classes/Interaction.py index a22a8f77..3e85df18 100644 --- a/analysis/classes/Interaction.py +++ b/analysis/classes/Interaction.py @@ -35,10 +35,17 @@ def __init__(self, interaction_id, particles, vertex=None, nu_id=-1, volume=0): # Voxel indices of an interaction is defined by the union of # constituent particle voxel indices self.voxel_indices = [] + self.points = [] + self.depositions = [] for p in self.particles: self.voxel_indices.append(p.voxel_indices) + self.points.append(p.points) + self.depositions.append(p.depositions) assert p.interaction_id == interaction_id self.voxel_indices = np.hstack(self.voxel_indices) + self.points = np.concatenate(self.points, axis=0) + self.depositions = np.hstack(self.depositions) + self.size = self.voxel_indices.shape[0] self.num_particles = len(self.particles) diff --git a/analysis/classes/__init__.py b/analysis/classes/__init__.py index 6902e665..c4fb0f0f 100644 --- a/analysis/classes/__init__.py +++ b/analysis/classes/__init__.py @@ -4,3 +4,4 @@ from .TruthParticleFragment import TruthParticleFragment from .Interaction import Interaction from .TruthInteraction import TruthInteraction +from .FlashManager import FlashManager diff --git a/mlreco/iotools/parsers/__init__.py b/mlreco/iotools/parsers/__init__.py index a32f7fb8..00e13ab2 100644 --- a/mlreco/iotools/parsers/__init__.py +++ b/mlreco/iotools/parsers/__init__.py @@ -103,5 +103,6 @@ from mlreco.iotools.parsers.misc import ( parse_meta2d, parse_meta3d, - parse_run_info + parse_run_info, + parse_opflash ) diff --git a/mlreco/iotools/parsers/misc.py b/mlreco/iotools/parsers/misc.py index 4918d138..e8964148 100644 --- a/mlreco/iotools/parsers/misc.py +++ b/mlreco/iotools/parsers/misc.py @@ -122,3 +122,25 @@ def parse_run_info(sparse_event): (run, subrun, event) """ return sparse_event.run(), sparse_event.subrun(), sparse_event.event() + + +def parse_opflash(opflash_event): + """ + Copy construct OpFlash and return an array of larcv::Flash. + + .. code-block:: yaml + schema: + opflash_cryoE: + parser:parse_opflash + opflash_event: opflash_cryoE + + Configuration + ------------- + opflash_event: larcv::EventFlash + + Returns + ------- + list + """ + opflashes = [larcv.Flash(f) for f in opflash_event.as_vector()] + return opflashes diff --git a/mlreco/utils/groups.py b/mlreco/utils/groups.py index a945fb03..bd998a8a 100644 --- a/mlreco/utils/groups.py +++ b/mlreco/utils/groups.py @@ -278,10 +278,16 @@ def get_nu_id(cluster_event, particle_v, interaction_ids, particle_mpv=None): else: # Find mpv particles is_mpv = np.zeros((len(particle_v),)) - mpv_ids = [p.id() for p in particle_mpv] + # mpv_ids = [p.id() for p in particle_mpv] + mpv_pdg = np.array([p.pdg_code() for p in particle_mpv]) + mpv_energy = np.array([p.energy_init() for p in particle_mpv]) for idx, part in enumerate(particle_v): # track_id - 1 in `particle_pcluster_tree` corresponds to id (or track_id) in `particle_mpv_tree` - if (part.track_id()-1) in mpv_ids or (part.ancestor_track_id()-1) in mpv_ids: + # if (part.track_id()-1) in mpv_ids or (part.ancestor_track_id()-1) in mpv_ids: + # FIXME the above was wrong I think. + close = np.isclose(part.energy_init()*1e-3, mpv_energy) + pdg = part.pdg_code() == mpv_pdg + if close.any() and pdg.any() and (np.where(close)[0] == np.where(pdg)[0]).any(): is_mpv[idx] = 1. # else: # print("fake cosmic", part.pdg_code(), part.shape(), part.creation_process(), part.track_id(), part.ancestor_track_id(), mpv_ids) From 535f88dcc3df869b282ec634b70c81ce7d981b06 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Thu, 3 Nov 2022 17:49:40 -0700 Subject: [PATCH 35/52] Added option in GrapPA to do edge length selection based on semantic type --- mlreco/models/grappa.py | 49 +++++++++---- mlreco/utils/gnn/cluster.py | 54 +++++++++++++-- mlreco/utils/gnn/network.py | 132 +++++++++++++++++++----------------- mlreco/utils/numba.py | 56 +++++++-------- 4 files changed, 179 insertions(+), 112 deletions(-) diff --git a/mlreco/models/grappa.py b/mlreco/models/grappa.py index d6a3ed7c..38d8b74f 100644 --- a/mlreco/models/grappa.py +++ b/mlreco/models/grappa.py @@ -8,8 +8,8 @@ from mlreco.models.layers.gnn import gnn_model_construct, node_encoder_construct, edge_encoder_construct, node_loss_construct, edge_loss_construct from mlreco.utils.gnn.data import merge_batch, split_clusts, split_edge_index -from mlreco.utils.gnn.cluster import form_clusters, get_cluster_batch, get_cluster_label, get_cluster_points_label, get_cluster_directions, get_cluster_dedxs -from mlreco.utils.gnn.network import complete_graph, delaunay_graph, mst_graph, bipartite_graph, inter_cluster_distance, knn_graph +from mlreco.utils.gnn.cluster import form_clusters, get_cluster_batch, get_cluster_label, get_cluster_primary_label, get_cluster_points_label, get_cluster_directions, get_cluster_dedxs +from mlreco.utils.gnn.network import complete_graph, delaunay_graph, mst_graph, bipartite_graph, inter_cluster_distance, knn_graph, restrict_graph class GNN(torch.nn.Module): """ @@ -150,9 +150,18 @@ def __init__(self, cfg, name='grappa', batch_col=0, coords_col=(1, 4)): # Choose what type of network to use self.network = base_config.get('network', 'complete') self.edge_max_dist = base_config.get('edge_max_dist', -1) - self.edge_dist_metric = base_config.get('edge_dist_metric', 'set') + self.edge_dist_metric = base_config.get('edge_dist_metric', 'voxel') self.edge_knn_k = base_config.get('edge_knn_k', 5) + # Turn the edge_max_dist value into a matrix + if not isinstance(self.edge_max_dist, list): self.edge_max_dist = [self.edge_max_dist] + mat_size = int((np.sqrt(8*len(self.edge_max_dist)+1)-1)/2) + max_dist_mat = np.zeros((mat_size, mat_size), dtype=float) + max_dist_mat[np.triu_indices(mat_size)] = self.edge_max_dist + max_dist_mat += max_dist_mat.T - np.diag(np.diag(max_dist_mat)) + self.edge_max_dist = max_dist_mat + print('edge_max_dist matrix', self.edge_max_dist) + # If requested, merge images together within the batch self.merge_batch = base_config.get('merge_batch', False) self.merge_batch_mode = base_config.get('merge_batch_mode', 'const') @@ -300,26 +309,25 @@ def forward(self, data, clusts=None, groups=None, points=None, extra_feats=None, # If necessary, compute the cluster distance matrix dist_mat = None - if self.edge_max_dist > 0 or self.network == 'mst' or self.network == 'knn': - dist_mat = inter_cluster_distance(cluster_data[:,self.coords_index[0]:self.coords_index[1]], clusts, batch_ids, self.edge_dist_metric) + if np.any(self.edge_max_dist > -1) or self.network == 'mst' or self.network == 'knn': + dist_mat = inter_cluster_distance(cluster_data[:,self.coords_index[0]:self.coords_index[1]].float(), clusts, batch_ids, self.edge_dist_metric) # Form the requested network if len(clusts) == 1: edge_index = np.empty((2,0), dtype=np.int64) elif self.network == 'complete': - edge_index = complete_graph(batch_ids, dist_mat, self.edge_max_dist) + edge_index = complete_graph(batch_ids) elif self.network == 'delaunay': import numba as nb - edge_index = delaunay_graph(cluster_data.cpu().numpy(), nb.typed.List(clusts), batch_ids, dist_mat, self.edge_max_dist, - batch_col=self.batch_index, coords_col=self.coords_index) + edge_index = delaunay_graph(cluster_data.cpu().numpy(), nb.typed.List(clusts), batch_ids, self.batch_index, self.coords_index) elif self.network == 'mst': - edge_index = mst_graph(batch_ids, dist_mat, self.edge_max_dist) + edge_index = mst_graph(batch_ids, dist_mat) elif self.network == 'knn': edge_index = knn_graph(batch_ids, self.edge_knn_k, dist_mat) elif self.network == 'bipartite': clust_ids = get_cluster_label(cluster_data, clusts, self.source_col) group_ids = get_cluster_label(cluster_data, clusts, self.target_col) - edge_index = bipartite_graph(batch_ids, clust_ids==group_ids, dist_mat, self.edge_max_dist) + edge_index = bipartite_graph(batch_ids, clust_ids==group_ids, dist_mat) else: raise ValueError('Network type not recognized: '+self.network) @@ -328,6 +336,15 @@ def forward(self, data, clusts=None, groups=None, points=None, extra_feats=None, mask = groups[edge_index[0]] == groups[edge_index[1]] edge_index = edge_index[:,mask] + # Restrict the input graph based on edge distance, if requested + if np.any(self.edge_max_dist > -1): + if self.edge_max_dist.shape[0] == 1: + edge_index = restrict_graph(edge_index, dist_mat, self.edge_max_dist) + else: + # Here get_cluster_primary_label is used to ensure that Michel/Delta showers are given the appropriate semantic label + classes = extra_feats[:,-1].cpu().numpy().astype(int) if extra_feats is not None else get_cluster_primary_label(cluster_data, clusts, -1).astype(int) + edge_index = restrict_graph(edge_index, dist_mat, self.edge_max_dist, classes) + # Update result with a list of edges for each batch id edge_index_split, ebids = split_edge_index(edge_index, batch_ids, batches) result['edge_index'] = [edge_index_split] @@ -346,11 +363,15 @@ def forward(self, data, clusts=None, groups=None, points=None, extra_feats=None, points = get_cluster_points_label(cluster_data, particles, clusts, coords_index=self.coords_index) x = torch.cat([x, points.float()], dim=1) if self.add_start_dir: - dirs = get_cluster_directions(cluster_data[:, self.coords_index[0]:self.coords_index[1]], points[:,:3], clusts, self.start_dir_max_dist, self.start_dir_opt) - x = torch.cat([x, dirs.float()], dim=1) + dirs_start = get_cluster_directions(cluster_data[:, self.coords_index[0]:self.coords_index[1]], points[:,:3], clusts, self.start_dir_max_dist, self.start_dir_opt) + dirs_end = get_cluster_directions(cluster_data[:, self.coords_index[0]:self.coords_index[1]], points[:,3:6], clusts, self.start_dir_max_dist, self.start_dir_opt) + #x = torch.cat([x, dirs_start.float(), dirs_end.float()], dim=1) + x = torch.cat([x, dirs_start.float()], dim=1) if self.add_start_dedx: - dedxs = get_cluster_dedxs(cluster_data[:, self.coords_index[0]:self.coords_index[1]], cluster_data[:,4], points[:,:3], clusts, self.start_dir_max_dist) - x = torch.cat([x, dedxs.reshape(-1,1).float()], dim=1) + dedxs_start = get_cluster_dedxs(cluster_data[:, self.coords_index[0]:self.coords_index[1]], cluster_data[:,4], points[:,:3], clusts, self.start_dir_max_dist) + dedxs_end = get_cluster_dedxs(cluster_data[:, self.coords_index[0]:self.coords_index[1]], cluster_data[:,4], points[:,3:6], clusts, self.start_dir_max_dist) + #x = torch.cat([x, dedxs_start.reshape(-1,1).float(), dedxs_end.reshape(-1,1).float()], dim=1) + x = torch.cat([x, dedxs_start.reshape(-1,1).float()], dim=1) # Bring edge_index and batch_ids to device index = torch.tensor(edge_index, device=cluster_data.device, dtype=torch.long) diff --git a/mlreco/utils/gnn/cluster.py b/mlreco/utils/gnn/cluster.py index 7136f14d..d933ce93 100644 --- a/mlreco/utils/gnn/cluster.py +++ b/mlreco/utils/gnn/cluster.py @@ -123,10 +123,9 @@ def get_cluster_label(data, clusts, column=5): Args: data (np.ndarray) : (N,8) [x, y, z, batchid, value, id, groupid, shape] clusts ([np.ndarray]): (C) List of arrays of voxel IDs in each cluster - column (int) : Column which specifies the cluster ID - dtype (dtype) + column (int) : Column which specifies the cluster label Returns: - np.ndarray: (C) List of cluster IDs + np.ndarray: (C) List of cluster labels """ return _get_cluster_label(data, clusts, column) @@ -141,6 +140,47 @@ def _get_cluster_label(data: nb.float64[:,:], labels[i] = v[np.argmax(np.array(cts))] return labels + +@numba_wrapper(cast_args=['data'], list_args=['clusts']) +def get_cluster_primary_label(data, clusts, column, cluster_column=5, group_column=6): + """ + Function that returns the majority label of the primary component + of a cluster, as specified in the requested data column. + + The primary component is identified by picking the set of label + voxels that have a cluster_id identical to the cluster group_id. + + Args: + data (np.ndarray) : (N,8) [x, y, z, batchid, value, id, groupid, shape] + clusts ([np.ndarray]): (C) List of arrays of voxel IDs in each cluster + column (int) : Column which specifies the cluster label + cluster_column (int) : Column which specifies the cluster ID + group_column (int) : Column which specifies the cluster group ID + Returns: + np.ndarray: (C) List of cluster primary labels + """ + return _get_cluster_primary_label(data, clusts, column, cluster_column, group_column) + +@nb.njit(cache=True) +def _get_cluster_primary_label(data: nb.float64[:,:], + clusts: nb.types.List(nb.int64[:]), + column: nb.int64, + cluster_column: nb.int64 = 5, + group_column: nb.int64 = 6) -> nb.float64[:]: + labels = np.empty(len(clusts), dtype=data.dtype) + group_ids = _get_cluster_label(data, clusts, group_column) + for i in range(len(clusts)): + cluster_ids = data[clusts[i], cluster_column] + primary_mask = cluster_ids == group_ids[i] + if len(data[clusts[i][primary_mask]]): + v, cts = unique_nb(data[clusts[i][primary_mask], column]) + else: # If the primary is empty, use group + v, cts = unique_nb(data[clusts[i], column]) + labels[i] = v[np.argmax(np.array(cts))] + + return labels + + @numba_wrapper(cast_args=['data'], list_args=['clusts'], keep_torch=True, ref_arg='data') def get_momenta_label(data, clusts, column=8): """ @@ -350,13 +390,13 @@ def get_cluster_points_label(data, particles, clusts, random_order=True, batch_c and end points of tracks if track. Args: - data (torch.tensor) : (N,6) Voxel coordinates [x, y, z, batch_id, value, clust_id, group_id] - particles (torch.tensor): (N,9) Point coordinates [start_x, start_y, start_z, batch_id, last_x, last_y, last_z, start_t, shape_id] + data (torch.tensor) : (N,X) Voxel coordinates [batch_id, x, y, z, ...] + particles (torch.tensor): (N,9) Point coordinates [batch_id, start_x, start_y, start_z, last_x, last_y, last_z, start_t, shape_id] (obtained with parse_particle_coords) clusts ([np.ndarray]) : (C) List of arrays of voxel IDs in each cluster random_order (bool) : Whether or not to shuffle the start and end points randomly Returns: - np.ndarray: (N,3/6) particle wise start (and end points in RANDOMIZED ORDER) + np.ndarray: (N,6) cluster-wise start and end points (in RANDOMIZED ORDER by default) """ return _get_cluster_points_label(data, particles, clusts, random_order, batch_col=batch_col, @@ -528,7 +568,7 @@ def cluster_direction(voxels: nb.float64[:,:], Args: voxels (torch.tensor): (N,3) Voxel coordinates [x, y, z] - starts (torch.tensor): (C,3) Coordinates of the start points + start (torch.tensor) : (C,3) Coordinates of the start point max_dist (float) : Max distance between start voxel and other voxels optimize (bool) : Optimizes the number of points involved in the estimate Returns: diff --git a/mlreco/utils/gnn/network.py b/mlreco/utils/gnn/network.py index 0f50b2f8..11466c49 100644 --- a/mlreco/utils/gnn/network.py +++ b/mlreco/utils/gnn/network.py @@ -30,16 +30,14 @@ def loop_graph(n: nb.int64) -> nb.int64[:,:]: @nb.njit(cache=True) def complete_graph(batch_ids: nb.int64[:], - dist_mat: nb.float64[:,:] = None, - max_dist: nb.float64 = -1.) -> nb.int64[:,:]: + directed: bool = False) -> nb.int64[:,:]: """ Function that returns an incidence matrix of a complete graph that connects every node with ever other node. Args: batch_ids (np.ndarray): (C) List of batch ids - dist_mat (np.ndarray) : (C,C) Tensor of pair-wise cluster distances - max_dist (double) : Maximal edge length + directed (bool) : If directed, only keep edges [i,j] for which j>=i Returns: np.ndarray: (2,E) Tensor of edges """ @@ -60,16 +58,9 @@ def complete_graph(batch_ids: nb.int64[:], ret[k] = [i,j] k += 1 - # If requested, remove the edges above a certain length threshold - if max_dist > -1: - assert dist_mat is not None - dists = np.empty(len(ret), dtype=dist_mat.dtype) - for k, e in enumerate(ret): - dists[k] = dist_mat[e[0],e[1]] - ret = ret[dists < max_dist] - - # Add the reciprocal edges as to create an undirected graph - ret = np.vstack((ret, ret[:,::-1])) + # Add the reciprocal edges as to create an undirected graph, if requested + if not directed: + ret = np.vstack((ret, ret[:,::-1])) return ret.T @@ -77,8 +68,7 @@ def complete_graph(batch_ids: nb.int64[:], def delaunay_graph(data: nb.float64[:,:], clusts: nb.types.List(nb.int64[:]), batch_ids: nb.int64[:], - dist_mat: nb.float64[:,:] = None, - max_dist: nb.float64 = -1., + directed: bool = False, batch_col: nb.int64 = 0, coords_col: nb.types.List(nb.int64[:]) = (1, 4)) -> nb.int64[:,:]: """ @@ -89,8 +79,7 @@ def delaunay_graph(data: nb.float64[:,:], data (np.ndarray) : (N,4) [x, y, z, batchid] clusts ([np.ndarray]) : (C) List of arrays of voxel IDs in each cluster batch_ids (np.ndarray): (C) List of batch ids - dist_mat (np.ndarray) : (C,C) Tensor of pair-wise cluster distances - max_dist (double) : Maximal edge length + directed (bool) : If directed, only keep edges [i,j] for which j>=i Returns: np.ndarray: (2,E) Tensor of edges """ @@ -115,24 +104,17 @@ def delaunay_graph(data: nb.float64[:,:], edges = np.vstack((clust_ids[edges[0]],clust_ids[edges[1]])).T ret = np.vstack((ret, edges)) - # If requested, remove the edges above a certain length threshold - if max_dist > -1: - assert dist_mat is not None - dists = np.empty(len(ret), dtype=dist_mat.dtype) - for k, e in enumerate(ret): - dists[k] = dist_mat[e[0],e[1]] - ret = ret[dists < max_dist] - - # Add the reciprocal edges as to create an undirected graph - ret = np.vstack((ret, ret[:,::-1])) + # Add the reciprocal edges as to create an undirected graph, if requested + if not directed: + ret = np.vstack((ret, ret[:,::-1])) return ret.T @nb.njit(cache=True) def mst_graph(batch_ids: nb.int64[:], - dist_mat: nb.float64[:,:] = None, - max_dist: nb.float64 = -1.) -> nb.int64[:,:]: + dist_mat: nb.float64[:,:], + directed: bool = False) -> nb.int64[:,:]: """ Function that returns an incidence matrix that connects nodes that share an edge in their corresponding Euclidean Minimum Spanning Tree (MST). @@ -140,7 +122,7 @@ def mst_graph(batch_ids: nb.int64[:], Args: batch_ids (np.ndarray): (C) List of batch ids dist_mat (np.ndarray) : (C,C) Tensor of pair-wise cluster distances - max_dist (double) : Maximal edge length + directed (bool) : If directed, only keep edges [i,j] for which j>=i Returns: np.ndarray: (2,E) Tensor of edges """ @@ -156,16 +138,9 @@ def mst_graph(batch_ids: nb.int64[:], edges = np.vstack((clust_ids[edges[0]],clust_ids[edges[1]])).T ret = np.vstack((ret, edges)) - # If requested, remove the edges above a certain length threshold - if max_dist > -1: - assert dist_mat is not None - dists = np.empty(len(ret), dtype=dist_mat.dtype) - for k, e in enumerate(ret): - dists[k] = dist_mat[e[0],e[1]] - ret = ret[dists < max_dist] - - # Add the reciprocal edges as to create an undirected graph - ret = np.vstack((ret, ret[:,::-1])) + # Add the reciprocal edges as to create an undirected graph, if requested + if not directed: + ret = np.vstack((ret, ret[:,::-1])) return ret.T @@ -173,7 +148,8 @@ def mst_graph(batch_ids: nb.int64[:], @nb.njit(cache=True) def knn_graph(batch_ids: nb.int64[:], k: nb.int64, - dist_mat: nb.float64[:,:] = None) -> nb.int64[:,:]: + dist_mat: nb.float64[:,:], + directed: bool = False) -> nb.int64[:,:]: """ Function that returns an incidence matrix that connects nodes that are k nearest neighbors. Sorts the distance matrix. @@ -182,6 +158,7 @@ def knn_graph(batch_ids: nb.int64[:], batch_ids (np.ndarray): (C) List of batch ids k (int) : Number of connected neighbors for each node dist_mat (np.ndarray) : (C,C) Tensor of pair-wise cluster distances + directed (bool) : If directed, only keep edges [i,j] for which j>=i Returns: np.ndarray: (2,E) Tensor of edges """ @@ -200,8 +177,9 @@ def knn_graph(batch_ids: nb.int64[:], if len(edges): ret = np.vstack((ret, edges)) - # Add the reciprocal edges as to create an undirected graph - ret = np.vstack((ret, ret[:,::-1])) + # Add the reciprocal edges as to create an undirected graph, if requested + if not directed: + ret = np.vstack((ret, ret[:,::-1])) return ret.T @@ -209,8 +187,6 @@ def knn_graph(batch_ids: nb.int64[:], @nb.njit(cache=True) def bipartite_graph(batch_ids: nb.int64[:], primaries: nb.boolean[:], - dist_mat: nb.float64[:,:] = None, - max_dist: nb.float64 = -1, directed: nb.boolean = True, directed_to: str = 'secondary') -> nb.int64[:,:]: """ @@ -220,8 +196,8 @@ def bipartite_graph(batch_ids: nb.int64[:], Args: batch_ids (np.ndarray): (C) List of batch ids primaries (np.ndarray): (C) Primary mask (True if primary) - dist_mat (np.ndarray) : (C,C) Tensor of pair-wise cluster distances - max_dist (double) : Maximal edge length + directed (bool) : True if edges only exist in one direction + directed_to (str) : Whether to point the edges to the primaries or the secondaries Returns: np.ndarray: (2,E) Tensor of edges """ @@ -232,14 +208,6 @@ def bipartite_graph(batch_ids: nb.int64[:], if batch_ids[i] == batch_ids[j]: ret = np.vstack((ret, np.array([[i,j]]))) - # If requested, remove the edges above a certain length threshold - if max_dist > -1: - assert dist_mat is not None - dists = np.empty(len(ret), dtype=dist_mat.dtype) - for k, e in enumerate(ret): - dists[k] = dist_mat[e[0],e[1]] - ret = ret[dists < max_dist] - # Handle directedness, by default graph is directed towards secondaries if directed: if directed_to == 'primary': @@ -252,6 +220,44 @@ def bipartite_graph(batch_ids: nb.int64[:], return ret.T +@nb.njit(cache=True) +def restrict_graph(edge_index: nb.int64[:,:], + dist_mat: nb.float64[:,:], + max_dist: nb.float64[:,:], + classes: nb.int64[:] = None) -> nb.int64[:,:]: + """ + Function that restricts an incidence matrix of a graph + to the edges below a certain length. + + If `classes` are specified, the maximum edge length must be provided + for each possible combination of node classes. + + Args: + edge_index (np.ndarray): (2,E) Tensor of edges + dist_mat (np.ndarray) : (C,C) Tensor of pair-wise cluster distances + max_dist (np.ndarray) : (N_c, N_c) Maximum edge length for each class type + classes (np.ndarray) : (C) List of class for each cluster in the graph + Returns: + np.ndarray: (2,E) Restricted tensor of edges + """ + if classes is None: + assert max_dist.shape[0] == max_dist.shape[1] == 1 + max_dist = max_dist[0][0] + edge_dists = np.empty(edge_index.shape[1], dtype=dist_mat.dtype) + for k in range(edge_index.shape[1]): + i, j = edge_index[0,k], edge_index[1,k] + edge_dists[k] = dist_mat[i, j] + return edge_index[:, edge_dists < max_dist] + else: + edge_max_dists = np.empty(edge_index.shape[1], dtype=dist_mat.dtype) + edge_dists = np.empty(edge_index.shape[1], dtype=dist_mat.dtype) + for k in range(edge_index.shape[1]): + i, j = edge_index[0,k], edge_index[1,k] + edge_max_dists[k] = max_dist[classes[i], classes[j]] + edge_dists[k] = dist_mat[i, j] + return edge_index[:, edge_dists < edge_max_dists] + + @numba_wrapper(cast_args=['data'], list_args=['clusts'], keep_torch=True, ref_arg='data') def get_cluster_edge_features(data, clusts, edge_index, batch_col=0, coords_col=(1, 4)): """ @@ -312,7 +318,7 @@ def _get_cluster_edge_features_vec(data: nb.float32[:,:], coords_col: nb.types.List(nb.int64[:]) = (1, 4)) -> nb.float32[:,:]: # Get the closest points of approach IDs for each edge - lend, idxs1, idxs2 = _get_edge_distances(data[:,:3], clusts, edge_index) + lend, idxs1, idxs2 = _get_edge_distances(data[:,coords_col[0]:coords_col[1]], clusts, edge_index) # Get the points that correspond to the first voxels v1 = data[idxs1, coords_col[0]:coords_col[1]] @@ -380,7 +386,7 @@ def _get_voxel_edge_features(data: nb.float32[:,:], return feats -@numba_wrapper(cast_args=['voxels'], list_args='clusts') +@numba_wrapper(cast_args=['voxels'], list_args=['clusts']) def get_edge_distances(voxels, clusts, edge_index): """ For each edge, finds the closest points of approach (CPAs) between the @@ -441,27 +447,25 @@ def inter_cluster_distance(voxels, clusts, batch_ids=None, mode='voxel'): return _inter_cluster_distance(voxels, clusts, batch_ids, mode) @nb.njit(parallel=True, cache=True) -def _inter_cluster_distance(voxels: nb.float64[:,:], +def _inter_cluster_distance(voxels: nb.float32[:,:], clusts: nb.types.List(nb.int64[:]), batch_ids: nb.int64[:], mode: str = 'voxel') -> nb.float64[:,:]: assert len(clusts) == len(batch_ids) dist_mat = np.zeros((len(batch_ids), len(batch_ids)), dtype=voxels.dtype) - indxi, indxj = np.triu_indices(len(batch_ids), 1) + indxi, indxj = complete_graph(batch_ids, directed=True) if mode == 'voxel': for k in nb.prange(len(indxi)): i, j = indxi[k], indxj[k] - if batch_ids[i] == batch_ids[j]: - dist_mat[i,j] = dist_mat[j,i] = np.min(cdist_nb(voxels[clusts[i]], voxels[clusts[j]])) + dist_mat[i,j] = dist_mat[j,i] = np.min(cdist_nb(voxels[clusts[i]], voxels[clusts[j]])) elif mode == 'centroid': centroids = np.empty((len(batch_ids), voxels.shape[1]), dtype=voxels.dtype) for i in nb.prange(len(batch_ids)): centroids[i] = mean_nb(voxels[clusts[i]], axis=0) for k in nb.prange(len(indxi)): i, j = indxi[k], indxj[k] - if batch_ids[i] == batch_ids[j]: - dist_mat[i,j] = dist_mat[j,i] = np.sqrt(np.sum((centroids[j]-centroids[i])**2)) + dist_mat[i,j] = dist_mat[j,i] = np.sqrt(np.sum((centroids[j]-centroids[i])**2)) else: raise ValueError('Inter-cluster distance mode not supported') diff --git a/mlreco/utils/numba.py b/mlreco/utils/numba.py index a4aafede..d5b6e60e 100644 --- a/mlreco/utils/numba.py +++ b/mlreco/utils/numba.py @@ -10,8 +10,10 @@ def numba_wrapper(cast_args=[], list_args=[], keep_torch=False, ref_arg=None): to make the relevant conversions to numpy where necessary. Args: - type_arg (str) : Argument name which determines the data type and device location - list_args ([str]) : List of arguments which need to be cast to a numba list + cast_args ([str]): List of arguments to be cast to numpy + list_args ([str]): List of arguments which need to be cast to a numba typed list + keep_torch (bool): Make the output a torch object, if the reference argument is one + ref_arg (str) : Reference argument used to assign a type and device to the torch output Returns: Function ''' @@ -63,7 +65,7 @@ def inner(*args, **kwargs): @nb.njit(cache=True) -def unique_nb(x: nb.int64[:]) -> (nb.int64[:], nb.int64[:]): +def unique_nb(x: nb.int32[:]) -> (nb.int32[:], nb.int32[:]): b = np.sort(x.flatten()) unique = list(b[:1]) counts = [1 for _ in unique] @@ -77,9 +79,9 @@ def unique_nb(x: nb.int64[:]) -> (nb.int64[:], nb.int64[:]): @nb.njit(cache=True) -def submatrix_nb(x:nb.float64[:,:], - index1: nb.int64[:], - index2: nb.int64[:]) -> nb.float64[:,:]: +def submatrix_nb(x:nb.float32[:,:], + index1: nb.int32[:], + index2: nb.int32[:]) -> nb.float32[:,:]: """ Numba implementation of matrix subsampling """ @@ -91,8 +93,8 @@ def submatrix_nb(x:nb.float64[:,:], @nb.njit(cache=True) -def cdist_nb(x1: nb.float64[:,:], - x2: nb.float64[:,:]) -> nb.float64[:,:]: +def cdist_nb(x1: nb.float32[:,:], + x2: nb.float32[:,:]) -> nb.float32[:,:]: """ Numba implementation of Eucleadian cdist in 3D. """ @@ -104,8 +106,8 @@ def cdist_nb(x1: nb.float64[:,:], @nb.njit(cache=True) -def mean_nb(x: nb.float64[:,:], - axis: nb.int64) -> nb.float64[:]: +def mean_nb(x: nb.float32[:,:], + axis: nb.int32) -> nb.float32[:]: """ Numba implementation of np.mean(x, axis) """ @@ -121,13 +123,13 @@ def mean_nb(x: nb.float64[:,:], @nb.njit(cache=True) -def argmin_nb(x: nb.float64[:,:], - axis: nb.int64) -> nb.int64[:]: +def argmin_nb(x: nb.float32[:,:], + axis: nb.int32) -> nb.int32[:]: """ Numba implementation of np.argmin(x, axis) """ assert axis == 0 or axis == 1 - argmin = np.empty(x.shape[1-axis], dtype=np.int64) + argmin = np.empty(x.shape[1-axis], dtype=np.int32) if axis == 0: for i in range(len(argmin)): argmin[i] = np.argmin(x[:,i]) @@ -138,13 +140,13 @@ def argmin_nb(x: nb.float64[:,:], @nb.njit(cache=True) -def argmax_nb(x: nb.float64[:,:], - axis: nb.int64) -> nb.int64[:]: +def argmax_nb(x: nb.float32[:,:], + axis: nb.int32) -> nb.int32[:]: """ Numba implementation of np.argmax(x, axis) """ assert axis == 0 or axis == 1 - argmax = np.empty(x.shape[1-axis], dtype=np.int64) + argmax = np.empty(x.shape[1-axis], dtype=np.int32) if axis == 0: for i in range(len(argmax)): argmax[i] = np.argmax(x[:,i]) @@ -155,13 +157,13 @@ def argmax_nb(x: nb.float64[:,:], @nb.njit(cache=True) -def min_nb(x: nb.float64[:,:], - axis: nb.int64) -> nb.float64[:]: +def min_nb(x: nb.float32[:,:], + axis: nb.int32) -> nb.float32[:]: """ Numba implementation of np.max(x, axis) """ assert axis == 0 or axis == 1 - xmin = np.empty(x.shape[1-axis], dtype=np.int64) + xmin = np.empty(x.shape[1-axis], dtype=np.int32) if axis == 0: for i in range(len(xmin)): xmin[i] = np.min(x[:,i]) @@ -172,13 +174,13 @@ def min_nb(x: nb.float64[:,:], @nb.njit(cache=True) -def max_nb(x: nb.float64[:,:], - axis: nb.int64) -> nb.float64[:]: +def max_nb(x: nb.float32[:,:], + axis: nb.int32) -> nb.float32[:]: """ Numba implementation of np.max(x, axis) """ assert axis == 0 or axis == 1 - xmax = np.empty(x.shape[1-axis], dtype=np.int64) + xmax = np.empty(x.shape[1-axis], dtype=np.int32) if axis == 0: for i in range(len(xmax)): xmax[i] = np.max(x[:,i]) @@ -189,8 +191,8 @@ def max_nb(x: nb.float64[:,:], @nb.njit(cache=True) -def all_nb(x: nb.float64[:,:], - axis: nb.int64) -> nb.int64[:]: +def all_nb(x: nb.float32[:,:], + axis: nb.int32) -> nb.int32[:]: """ Numba implementation of np.all(x, axis) """ @@ -206,8 +208,8 @@ def all_nb(x: nb.float64[:,:], @nb.njit(cache=True) -def softmax_nb(x: nb.float64[:,:], - axis: nb.int64) -> nb.float64[:,:]: +def softmax_nb(x: nb.float32[:,:], + axis: nb.int32) -> nb.float32[:,:]: assert axis == 0 or axis == 1 if axis == 0: xmax = max_nb(x, axis=0) @@ -220,7 +222,7 @@ def softmax_nb(x: nb.float64[:,:], @nb.njit(cache=True) -def log_loss_nb(x1: nb.boolean[:], x2: nb.float64[:]) -> nb.float64: +def log_loss_nb(x1: nb.boolean[:], x2: nb.float32[:]) -> nb.float32: if len(x1) > 0: return -(np.sum(np.log(x2[x1])) + np.sum(np.log(1.-x2[~x1])))/len(x1) else: From 4d77afa31025b6bbacf034fd1429aa6dd8dddc36 Mon Sep 17 00:00:00 2001 From: Temigo Date: Fri, 4 Nov 2022 13:02:58 -0700 Subject: [PATCH 36/52] Connect FlashManager with Predictor --- analysis/classes/ui.py | 53 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/analysis/classes/ui.py b/analysis/classes/ui.py index 1f0d0158..31f5b57c 100644 --- a/analysis/classes/ui.py +++ b/analysis/classes/ui.py @@ -1,6 +1,7 @@ from typing import Callable, Tuple, List import numpy as np import pandas as pd +import os, sys from mlreco.utils.cluster.cluster_graph_constructor import ClusterGraphConstructor from mlreco.utils.ppn import uresnet_ppn_type_point_selector @@ -9,7 +10,7 @@ from scipy.special import softmax from analysis.classes import Particle, ParticleFragment, TruthParticleFragment, \ - TruthParticle, Interaction, TruthInteraction + TruthParticle, Interaction, TruthInteraction, FlashManager from analysis.classes.particle import matrix_counts, matrix_iou, \ match_particles_fn, match_interactions_fn, group_particles_to_interactions_fn from analysis.algorithms.point_matching import * @@ -54,7 +55,8 @@ class FullChainPredictor: 4) Does not support deghosting at the moment. (TODO) ''' - def __init__(self, data_blob, result, cfg, predictor_cfg={}, deghosting=False): + def __init__(self, data_blob, result, cfg, predictor_cfg={}, deghosting=False, + enable_flash_matching=False, flash_matching_cfg="", opflash_keys=[]): self.module_config = cfg['model']['modules'] self.cfg = cfg @@ -114,11 +116,58 @@ def __init__(self, data_blob, result, cfg, predictor_cfg={}, deghosting=False): self.vb = None self._num_volumes = 1 + # Prepare flash matching if requested + self.enable_flash_matching = enable_flash_matching + self.fm = None + if enable_flash_matching: + if 'meta' not in self.data_blob: + raise Exception('Meta unspecified in data_blob. Please add it to your I/O schema.') + #if 'FMATCH_BASEDIR' not in os.environ: + # raise Exception('FMATCH_BASEDIR undefined. Please source `OpT0Finder/configure.sh` or define it manually.') + assert os.path.exists(flash_matching_cfg) + assert len(opflash_keys) == self._num_volumes + + self.fm = FlashManager(cfg, flash_matching_cfg, meta=self.data_blob['meta'][0]) + self.opflash_keys = opflash_keys + + self.flash_matches = {} # key is volume, value is tuple (tpc_v, pmt_v, list of matches) + # type is (list of Interaction/TruthInteraction, list of larcv::Flash, list of flashmatch::FlashMatch_t) + + def __repr__(self): msg = "FullChainEvaluator(num_images={})".format(int(self.num_images/self._num_volumes)) return msg + def get_flash_matches(self, entry, use_true_tpc_objects=False, volume=None): + if volume not in self.flash_matches: + self._run_flash_matching(entry, use_true_tpc_objects=use_true_tpc_objects, volume=volume) + + tpc_v, pmt_v, matches = self.flash_matches[volume] + return [(tpc_v[m.tpc_id], pmt_v[m.flash_id], m) for m in matches] + def _run_flash_matching(self, entry, use_true_tpc_objects=False, volume=None): + if use_true_tpc_objects: + if not hasattr(self, 'get_true_interactions'): + raise Exception('This Predictor does not know about truth info.') + + tpc_v = self.get_true_interactions(entry, drop_nonprimary_particles=False, volume=volume) + else: + tpc_v = self.get_interactions(entry, drop_nonprimary_particles=False, volume=volume) + + input_tpc_v = self.fm.make_qcluster(tpc_v) + + selected_opflash_keys = self.opflash_keys + if volume is not None: + assert isinstance(volume, int) + selected_opflash_keys = [self.opflash_keys[volume]] + pmt_v = [] + for key in selected_opflash_keys: + pmt_v.extend(self.data_blob[key][entry]) + input_pmt_v = self.fm.make_flash([self.data_blob[key][entry] for key in selected_opflash_keys]) + + matches = self.fm.run_flash_matching() + self.flash_matches[volume] = (tpc_v, pmt_v, matches) + def _fit_predict_ppn(self, entry): ''' Method for predicting ppn predictions. From ab70713aed6df00dff45408b850ab913165f09ce Mon Sep 17 00:00:00 2001 From: Temigo Date: Fri, 4 Nov 2022 13:36:59 -0700 Subject: [PATCH 37/52] Add some documentation to flash matching interface --- analysis/classes/FlashManager.py | 76 ++++++++++++++++++++++++++++++++ analysis/classes/ui.py | 24 ++++++++++ 2 files changed, 100 insertions(+) diff --git a/analysis/classes/FlashManager.py b/analysis/classes/FlashManager.py index 2ddf6326..4daf4a06 100644 --- a/analysis/classes/FlashManager.py +++ b/analysis/classes/FlashManager.py @@ -2,7 +2,32 @@ class FlashManager: + """ + Meant as an interface to OpT0finder, likelihood-based flash matching. + + See https://github.com/drinkingkazu/OpT0Finder for more details about it. + """ def __init__(self, cfg, cfg_fmatch, meta=None, detector_specs=None): + """ + Expects that the environment variable `FMATCH_BASEDIR` is set. + You can either set it by hand (to the path where one can find + OpT0Finder) or you can source `OpT0Finder/configure.sh` if you + are running code from a command line. + + Parameters + ========== + cfg: dict + The full chain config. + cfg_fmatch: str + Path to config for OpT0Finder. + meta: np.ndarray, optional, default is None + Used to shift coordinates of interactions to "real" detector + coordinates for QCluster_t. + detector_specs: str, optional + Path to `detector_specs.cfg` file which defines some geometry + information about the detector PMT system. By default will look + into `OpT0Finder/dat/detector_specs.cfg`. + """ # Setup OpT0finder basedir = os.getenv('FMATCH_BASEDIR') @@ -80,6 +105,23 @@ def get_qcluster(self, tpc_id, array=False): raise Exception("TPC object %d does not exist in self.tpc_v" % tpc_id) def make_qcluster(self, interactions): + """ + Make flashmatch::QCluster_t objects from list of interactions. + + Note that coordinates of `interactions` are in voxel coordinates, + but inside this function we shift back to real detector coordinates + using meta information. flashmatch::QCluster_t objects are in + real cm coordinates. + + Parameters + ========== + interactions: list of Interaction/TruthInteraction + (Predicted or true) interaction objects. + + Returns + ======= + list of flashmatch::QCluster_t + """ from flashmatch import flashmatch if self.min_x is None: @@ -177,6 +219,17 @@ def run_flash_matching(self, flashes=None, interactions=None): return self.all_matches def get_match(self, idx, matches=None): + """ + Parameters + ========== + idx: int + Index of TPC object for which we want to retrieve a match. + matches: list of flashmatch::FlashMatch_t, optional, default is None + + Returns + ======= + flashmatch::FlashMatch_t + """ if matches is None: if self.all_matches is None: raise Exception("Need to run flash matching first with run_flash_matching.") @@ -189,6 +242,17 @@ def get_match(self, idx, matches=None): return None def get_matched_flash(self, idx, matches=None): + """ + Parameters + ========== + idx: int + Index of TPC object for which we want to retrieve a match. + matches: list of flashmatch::FlashMatch_t, optional, default is None + + Returns + ======= + flashmatch::Flash_t + """ m = self.get_match(idx, matches=matches) if m is None: return None @@ -202,5 +266,17 @@ def get_matched_flash(self, idx, matches=None): def get_t0(self, idx, matches=None): + """ + Parameters + ========== + idx: int + Index of TPC object for which we want to retrieve a match. + matches: list of flashmatch::FlashMatch_t, optional, default is None + + Returns + ======= + float + Time in us with respect to simulation time reference. + """ flash = self.get_matched_flash(idx, matches=matches) return None if flash is None else flash.time diff --git a/analysis/classes/ui.py b/analysis/classes/ui.py index 31f5b57c..d181d35a 100644 --- a/analysis/classes/ui.py +++ b/analysis/classes/ui.py @@ -139,6 +139,22 @@ def __repr__(self): return msg def get_flash_matches(self, entry, use_true_tpc_objects=False, volume=None): + """ + If flash matches has not yet been computed for this volume, then it will + be run as part of this function. Otherwise, flash matching results are + cached in `self.flash_matches` per volume. + + Parameters + ========== + entry: int + use_true_tpc_objects: bool, default is False + Whether to use true or predicted interactions. + volume: int, default is None + + Returns + ======= + list of tuple (Interaction, larcv::Flash, flashmatch::FlashMatch_t) + """ if volume not in self.flash_matches: self._run_flash_matching(entry, use_true_tpc_objects=use_true_tpc_objects, volume=volume) @@ -146,6 +162,14 @@ def get_flash_matches(self, entry, use_true_tpc_objects=False, volume=None): return [(tpc_v[m.tpc_id], pmt_v[m.flash_id], m) for m in matches] def _run_flash_matching(self, entry, use_true_tpc_objects=False, volume=None): + """ + Parameters + ========== + entry: int + use_true_tpc_objects: bool, default is False + Whether to use true or predicted interactions. + volume: int, default is None + """ if use_true_tpc_objects: if not hasattr(self, 'get_true_interactions'): raise Exception('This Predictor does not know about truth info.') From b8c887343de1324342105554361b38d8449ced18 Mon Sep 17 00:00:00 2001 From: Temigo Date: Tue, 8 Nov 2022 10:04:12 -0800 Subject: [PATCH 38/52] Fix bug in get_nu_id + fix bug in get_flash_matches --- analysis/algorithms/selections/example_nue.py | 11 +++++++++-- analysis/classes/ui.py | 6 +++--- mlreco/utils/groups.py | 8 ++++---- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/analysis/algorithms/selections/example_nue.py b/analysis/algorithms/selections/example_nue.py index 3f022776..3e3391b0 100644 --- a/analysis/algorithms/selections/example_nue.py +++ b/analysis/algorithms/selections/example_nue.py @@ -19,7 +19,14 @@ def debug_pid(data_blob, res, data_idx, analysis_cfg, cfg): predictor = FullChainEvaluator(data_blob, res, cfg, analysis_cfg) image_idxs = data_blob['index'] + print(data_blob['index'], data_blob['run_info']) for idx, index in enumerate(image_idxs): + index_dict = { + 'Index': index, + 'run': data_blob['run_info'][idx][0], + 'subrun': data_blob['run_info'][idx][1], + 'event': data_blob['run_info'][idx][2] + } # Process Interaction Level Information matches, counts = predictor.match_interactions(idx, @@ -75,7 +82,7 @@ def debug_pid(data_blob, res, data_idx, analysis_cfg, cfg): true_int_dict['true_nu_energy'] = nu.energy_init() pred_int_dict['interaction_match_counts'] = counts[i] - interactions_dict = OrderedDict({'Index': index}) + interactions_dict = OrderedDict(index_dict.copy()) interactions_dict.update(true_int_dict) interactions_dict.update(pred_int_dict) interactions.append(interactions_dict) @@ -89,7 +96,7 @@ def debug_pid(data_blob, res, data_idx, analysis_cfg, cfg): matched_particles, _, ious = match_particles_fn(true_particles, pred_particles) for i, m in enumerate(matched_particles): - particles_dict = OrderedDict({'Index': index}) + particles_dict = OrderedDict(index_dict.copy()) true_p, pred_p = m[0], m[1] pred_particle_dict = get_particle_properties(pred_p, vertex=pred_int.vertex, diff --git a/analysis/classes/ui.py b/analysis/classes/ui.py index d181d35a..061c5e29 100644 --- a/analysis/classes/ui.py +++ b/analysis/classes/ui.py @@ -130,7 +130,7 @@ def __init__(self, data_blob, result, cfg, predictor_cfg={}, deghosting=False, self.fm = FlashManager(cfg, flash_matching_cfg, meta=self.data_blob['meta'][0]) self.opflash_keys = opflash_keys - self.flash_matches = {} # key is volume, value is tuple (tpc_v, pmt_v, list of matches) + self.flash_matches = {} # key is (volume, use_true_tpc_objects), value is tuple (tpc_v, pmt_v, list of matches) # type is (list of Interaction/TruthInteraction, list of larcv::Flash, list of flashmatch::FlashMatch_t) @@ -155,7 +155,7 @@ def get_flash_matches(self, entry, use_true_tpc_objects=False, volume=None): ======= list of tuple (Interaction, larcv::Flash, flashmatch::FlashMatch_t) """ - if volume not in self.flash_matches: + if (volume, use_true_tpc_objects) not in self.flash_matches: self._run_flash_matching(entry, use_true_tpc_objects=use_true_tpc_objects, volume=volume) tpc_v, pmt_v, matches = self.flash_matches[volume] @@ -190,7 +190,7 @@ def _run_flash_matching(self, entry, use_true_tpc_objects=False, volume=None): input_pmt_v = self.fm.make_flash([self.data_blob[key][entry] for key in selected_opflash_keys]) matches = self.fm.run_flash_matching() - self.flash_matches[volume] = (tpc_v, pmt_v, matches) + self.flash_matches[(volume, use_true_tpc_objects)] = (tpc_v, pmt_v, matches) def _fit_predict_ppn(self, entry): ''' diff --git a/mlreco/utils/groups.py b/mlreco/utils/groups.py index bd998a8a..574b9d4f 100644 --- a/mlreco/utils/groups.py +++ b/mlreco/utils/groups.py @@ -275,19 +275,19 @@ def get_nu_id(cluster_event, particle_v, interaction_ids, particle_mpv=None): # if there is nu interaction if num_primary > 1: nu_id[inds] = 1 - else: + elif len(particle_mpv) > 0: # Find mpv particles is_mpv = np.zeros((len(particle_v),)) # mpv_ids = [p.id() for p in particle_mpv] - mpv_pdg = np.array([p.pdg_code() for p in particle_mpv]) - mpv_energy = np.array([p.energy_init() for p in particle_mpv]) + mpv_pdg = np.array([p.pdg_code() for p in particle_mpv]).reshape((-1,)) + mpv_energy = np.array([p.energy_init() for p in particle_mpv]).reshape((-1,)) for idx, part in enumerate(particle_v): # track_id - 1 in `particle_pcluster_tree` corresponds to id (or track_id) in `particle_mpv_tree` # if (part.track_id()-1) in mpv_ids or (part.ancestor_track_id()-1) in mpv_ids: # FIXME the above was wrong I think. close = np.isclose(part.energy_init()*1e-3, mpv_energy) pdg = part.pdg_code() == mpv_pdg - if close.any() and pdg.any() and (np.where(close)[0] == np.where(pdg)[0]).any(): + if (close & pdg).any(): is_mpv[idx] = 1. # else: # print("fake cosmic", part.pdg_code(), part.shape(), part.creation_process(), part.track_id(), part.ancestor_track_id(), mpv_ids) From 8b320a0052c2d570aa94c2fe234a73fd819dc388 Mon Sep 17 00:00:00 2001 From: Temigo Date: Tue, 8 Nov 2022 10:20:26 -0800 Subject: [PATCH 39/52] Forgot one fix --- analysis/classes/ui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analysis/classes/ui.py b/analysis/classes/ui.py index 061c5e29..6ab72311 100644 --- a/analysis/classes/ui.py +++ b/analysis/classes/ui.py @@ -158,7 +158,7 @@ def get_flash_matches(self, entry, use_true_tpc_objects=False, volume=None): if (volume, use_true_tpc_objects) not in self.flash_matches: self._run_flash_matching(entry, use_true_tpc_objects=use_true_tpc_objects, volume=volume) - tpc_v, pmt_v, matches = self.flash_matches[volume] + tpc_v, pmt_v, matches = self.flash_matches[(volume, use_true_tpc_objects)] return [(tpc_v[m.tpc_id], pmt_v[m.flash_id], m) for m in matches] def _run_flash_matching(self, entry, use_true_tpc_objects=False, volume=None): From 2024bd732ab21d24a832afcc296877dc00edcfc9 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Tue, 8 Nov 2022 14:11:04 -0800 Subject: [PATCH 40/52] Nomenclature change in GrapPA node kinematics loss output --- mlreco/models/layers/gnn/losses/node_kinematics.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlreco/models/layers/gnn/losses/node_kinematics.py b/mlreco/models/layers/gnn/losses/node_kinematics.py index a8f815f2..c26592c4 100644 --- a/mlreco/models/layers/gnn/losses/node_kinematics.py +++ b/mlreco/models/layers/gnn/losses/node_kinematics.py @@ -326,9 +326,9 @@ def forward(self, out, types): result.update({ 'vtx_labels': vtx_labels, 'vtx_score_loss': vtx_score_loss/n_clusts_vtx if n_clusts_vtx else 0., - 'vtx_score_acc': vtx_score_acc/n_clusts_vtx if n_clusts_vtx else 1., + 'vtx_score_accuracy': vtx_score_acc/n_clusts_vtx if n_clusts_vtx else 1., 'vtx_position_loss': vtx_position_loss/n_clusts_vtx_pos if n_clusts_vtx_pos else 0., - 'vtx_position_acc': vtx_position_acc/n_clusts_vtx_pos if n_clusts_vtx_pos else 1. + 'vtx_position_accuracy': vtx_position_acc/n_clusts_vtx_pos if n_clusts_vtx_pos else 1. }) if self.use_anchor_points: result['vtx_anchors'] = vtx_anchors @@ -601,8 +601,8 @@ def forward(self, out, types, iteration=None): result.update({ 'vtx_position_loss': 0., 'vtx_score_loss': 0., - 'vtx_position_acc': 0., - 'vtx_score_acc': 0., + 'vtx_position_accurary': 0., + 'vtx_score_accuracy': 0., }) return result @@ -628,9 +628,9 @@ def forward(self, out, types, iteration=None): if compute_vtx: result.update({ 'vtx_score_loss': 0. if not n_clusts_vtx else vtx_score_loss/n_clusts_vtx, - 'vtx_score_acc': 0. if not n_clusts_vtx else vtx_score_acc/n_clusts_vtx, + 'vtx_score_accurary': 0. if not n_clusts_vtx else vtx_score_acc/n_clusts_vtx, 'vtx_position_loss': 0. if not n_clusts_vtx_positives else vtx_position_loss/n_clusts_vtx_positives, - 'vtx_position_acc': 0. if not n_clusts_vtx_positives else vtx_position_acc/n_clusts_vtx_positives, + 'vtx_position_accuray': 0. if not n_clusts_vtx_positives else vtx_position_acc/n_clusts_vtx_positives, }) return result From 2f998a3c11e30f6bfa628006e70288bcd9677eae Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Wed, 9 Nov 2022 11:42:39 -0800 Subject: [PATCH 41/52] Weird sys.path.insert causing circular imports removed --- mlreco/utils/cluster/dense_cluster.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/mlreco/utils/cluster/dense_cluster.py b/mlreco/utils/cluster/dense_cluster.py index bd55fb3b..6e037020 100644 --- a/mlreco/utils/cluster/dense_cluster.py +++ b/mlreco/utils/cluster/dense_cluster.py @@ -1,22 +1,14 @@ import numpy as np import pandas as pd -import sys import os, re import torch import yaml import time -from scipy.spatial.distance import cdist from sklearn.metrics import adjusted_rand_score as ari -import argparse - -current_directory = os.path.dirname(os.path.abspath(__file__)) -current_directory = os.path.dirname(current_directory) -sys.path.insert(0, current_directory) from mlreco.utils.metrics import * from mlreco.trainval import trainval from mlreco.iotools.factories import loader_factory -from sklearn.cluster import DBSCAN from pprint import pprint From f4f99d5668ce5469180638e558def01e2dc7496e Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Wed, 9 Nov 2022 17:22:21 -0800 Subject: [PATCH 42/52] Fixed primary particle group labeling, added michel/delta to shower primaries --- mlreco/iotools/parsers/cluster.py | 9 ++-- mlreco/utils/groups.py | 70 ++++++++++++++++++++++++------- 2 files changed, 59 insertions(+), 20 deletions(-) diff --git a/mlreco/iotools/parsers/cluster.py b/mlreco/iotools/parsers/cluster.py index abc44f05..72216e61 100644 --- a/mlreco/iotools/parsers/cluster.py +++ b/mlreco/iotools/parsers/cluster.py @@ -1,7 +1,7 @@ from collections import OrderedDict import numpy as np from larcv import larcv -from mlreco.utils.groups import get_interaction_id, get_nu_id, get_particle_id, get_primary_id +from mlreco.utils.groups import get_interaction_id, get_nu_id, get_particle_id, get_shower_primary_id, get_group_primary_id from mlreco.utils.groups import type_labels as TYPE_LABELS from mlreco.iotools.parsers.sparse import parse_sparse3d from mlreco.iotools.parsers.particles import parse_particles @@ -138,15 +138,16 @@ def parse_cluster3d(cluster_event, labels['inter'] = inter_ids labels['nu'] = nu_ids labels['type'] = get_particle_id(particles_v, nu_ids, include_mpr=type_include_mpr) - labels['primary'] = get_primary_id(cluster_event, particles_v) + labels['primary_shower'] = get_shower_primary_id(cluster_event, particles_v) if add_kinematics_info: + primary_ids = get_group_primary_id(particles_v) particles_v = parse_particles(particle_event, cluster_event) labels['type'] = get_particle_id(particles_v, nu_ids, include_mpr=type_include_mpr) - labels['p'] = np.array([(p.px()**2+p.py()**2+p.pz()**2)/1e3 for p in particles_v]) + labels['p'] = np.array([p.p()/1e3 for p in particles_v]) # In GeV labels['vtx_x'] = np.array([p.ancestor_position().x() for p in particles_v]) labels['vtx_y'] = np.array([p.ancestor_position().y() for p in particles_v]) labels['vtx_z'] = np.array([p.ancestor_position().z() for p in particles_v]) - labels['primary_group'] = np.array([p.group_id()==p.parent_id() for p in particles_v], dtype=np.float32) + labels['primary_group'] = primary_ids labels['sem'] = np.array([p.shape() for p in particles_v]) # Loop over clusters, store info diff --git a/mlreco/utils/groups.py b/mlreco/utils/groups.py index a945fb03..4771bd13 100644 --- a/mlreco/utils/groups.py +++ b/mlreco/utils/groups.py @@ -357,9 +357,9 @@ def get_particle_id(particles_v, nu_ids, include_mpr=False): return particle_ids -def get_primary_id(cluster_event, particles_v): +def get_shower_primary_id(cluster_event, particles_v): ''' - Function that assigns valid primary tags. + Function that assigns valid primary tags to shower fragments. This could be handled somewhere else (e.g. SUPERA) Inputs: @@ -368,35 +368,35 @@ def get_primary_id(cluster_event, particles_v): Outputs: - array: (N) list of group ids ''' - # Only shower fragments that come first in time and deposit energy can be primaries - group_ids = np.array([p.group_id() for p in particles_v]) - primary_ids = np.empty(particles_v.size(), dtype=np.int32) + # Loop over the list of particles + group_ids = np.array([p.group_id() for p in particles_v]) + primary_ids = np.empty(particles_v.size(), dtype=np.int32) for i, p in enumerate(particles_v): - # If the particle is LE, not primary - if p.shape() == 4: + # If the particle is a track or a low energy cluster, it is not a primary shower fragment + if p.shape() == 1 or p.shape() == 4: primary_ids[i] = 0 continue - # If the particle is not EM, use default - gid = int(p.group_id()) - if p.shape() != 0: - primary_ids[i] = int(gid == i) + # If a particle is a Delta or a Michel, it is a primary shower fragment + if p.shape() == 2 or p.shape() == 3: + primary_ids[i] = 1 continue - # If the particle is nuclear activity, Delta or Michel, make it non primary + # If the shower fragment originates from nuclear activity, it is not a primary process = p.creation_process() parent_pdg_code = abs(p.parent_pdg_code()) - if 'Inelastic' in process or 'Capture' in process or parent_pdg_code == 13: + if 'Inelastic' in process or 'Capture' in process or parent_pdg_code == 2112: primary_ids[i] = 0 continue - # If a particle's parent fragment has size zero, make it non primary + # If a shower group's parent fragment has size zero, there is no valid primary in the group + gid = int(p.group_id()) parent_size = cluster_event.as_vector()[gid].as_vector().size() if not parent_size: primary_ids[i] = 0 continue - # If a particle's parent is not the first in time, make it non primary + # If a shower group's parent fragment is not the first in time, there is no valid primary in the group idxs = np.where(group_ids == gid)[0] clust_times = np.array([particles_v[int(j)].first_step().t() for j in idxs]) min_id = np.argmin(clust_times) @@ -404,7 +404,45 @@ def get_primary_id(cluster_event, particles_v): primary_ids[i] = 0 continue - # Use default otherwise + # If all conditions are met, label shower fragments which have identical ID and group ID as primary primary_ids[i] = int(gid == i) return primary_ids + + +def get_group_primary_id(particles_v): + ''' + Function that assigns valid primary tags to particle groups. + This could be handled somewhere else (e.g. SUPERA) + + Inputs: + - particles_v (array of larcv::Particle) : (N) LArCV Particle objects + Outputs: + - array: (N) list of group ids + ''' + # Loop over the list of particles + primary_ids = np.empty(particles_v.size(), dtype=np.int32) + for i, p in enumerate(particles_v): + # If the particle is not a shower or a track, it is not a primary + if p.shape() != 0 and p.shape() != 1: + primary_ids[i] = 0 + continue + + # If the particle group originates from nuclear activity, it is not a primary + gid = int(p.group_id()) + process = particles_v[gid].creation_process() + parent_pdg_code = abs(particles_v[gid].parent_pdg_code()) + ancestor_pdg_code = abs(particles_v[gid].ancestor_pdg_code()) + if 'Inelastic' in process or 'Capture' in process or parent_pdg_code == 2112 or ancestor_pdg_code == 2112: + primary_ids[i] = 0 + continue + + # If the parent is a pi0, make sure that it is a primary pi0 (pi0s are not stored in particle list) + if parent_pdg_code == 111 and ancestor_pdg_code != 111: + primary_ids[i] = 0 + continue + + # If the parent ID of the primary particle in the group is the same as the group ID, it is a primary + primary_ids[i] = int(particles_v[gid].parent_id() == gid) + + return primary_ids From 90a1b0c4aa4d743b20ac478d6f9e1473c16ea961 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Wed, 9 Nov 2022 23:02:50 -0800 Subject: [PATCH 43/52] Bug fix in training curve visualization --- mlreco/visualization/training.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/mlreco/visualization/training.py b/mlreco/visualization/training.py index b61b5770..40c7ed87 100644 --- a/mlreco/visualization/training.py +++ b/mlreco/visualization/training.py @@ -79,7 +79,7 @@ def find_key(df, key_list, separator=':'): return key, key_name -def get_training_df(log_dir, prefix='train'): +def get_training_df(log_dir, keys, prefix='train'): """ Finds all training log files inside the specified directory and concatenates them. If the range of iterations overlap, keep only @@ -90,6 +90,7 @@ def get_training_df(log_dir, prefix='train'): Args: log_dir (str): Path to the directory that contains the training log files + keys (list) : List of quantities of interest prefix (str) : Prefix shared between training file names (default: `train`) Returns: pandas.DataFrame: Combined training log data @@ -98,7 +99,15 @@ def get_training_df(log_dir, prefix='train'): end_points = np.array([int(f.split('-')[-1].split('.csv')[0]) for f in log_files]) order = np.argsort(end_points) end_points = np.append(end_points[order], 1e12) - return pd.concat([pd.read_csv(f, nrows=end_points[i+1]-end_points[i]) for i, f in enumerate(log_files[order])], sort=True) + log_dfs = [] + for i, f in enumerate(log_files[order]): + df = pd.read_csv(f, nrows=end_points[i+1]-end_points[i]) + for key_list in keys: + key, key_name = find_key(df, key_list) + df[key_name] = df[key] + log_dfs.append(df) + + return pd.concat(log_dfs, sort=True) def get_validation_df(log_dir, keys, prefix='inference'): @@ -228,7 +237,7 @@ def draw_training_curves(log_dir, models, metrics, dfs, val_dfs, colors = {}, {}, {} for i, key in enumerate(models): log_subdir = log_dir+key - dfs[key] = get_training_df(log_subdir, train_prefix) + dfs[key] = get_training_df(log_subdir, metrics, train_prefix) val_dfs[key] = get_validation_df(log_subdir, metrics, val_prefix) colors[key] = plotly_colors[i] From 90828578bfee36cc450afd1887d0bd66fda8be47 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Thu, 10 Nov 2022 08:25:33 -0800 Subject: [PATCH 44/52] Relabeling of deghosted points now uses Chebyshev distance 1 in DBSCAN (cheaper, reliable) --- mlreco/utils/deghosting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlreco/utils/deghosting.py b/mlreco/utils/deghosting.py index 5540151b..f02b0e68 100644 --- a/mlreco/utils/deghosting.py +++ b/mlreco/utils/deghosting.py @@ -215,7 +215,7 @@ def adapt_labels_knn(result, label_seg, label_clustering, # for which cluster id and group id columns are 5 and 6 respectively. cluster_id_col = 5 track_label = 1 - dbscan = DBSCAN(eps=np.sqrt(3), min_samples=1) + dbscan = DBSCAN(eps=1.1, min_samples=1, metric='chebyshev') track_mask = label_c[:, -1] == track_label for batch_id in unique(coords[:, batch_column]): batch_mask = label_c[:, batch_column] == batch_id From 814c93aaa1dd47e13af9a9b6795db5097bb9fd51 Mon Sep 17 00:00:00 2001 From: Temigo Date: Thu, 10 Nov 2022 09:21:40 -0800 Subject: [PATCH 45/52] Fix multibatch bug for flash matching + add missing light yield in QCluster_t --- analysis/algorithms/selections/__init__.py | 1 + analysis/classes/FlashManager.py | 10 ++++++---- analysis/classes/TruthInteraction.py | 4 ++++ analysis/classes/ui.py | 21 +++++++++++++-------- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/analysis/algorithms/selections/__init__.py b/analysis/algorithms/selections/__init__.py index 392f6a06..b4887760 100644 --- a/analysis/algorithms/selections/__init__.py +++ b/analysis/algorithms/selections/__init__.py @@ -3,3 +3,4 @@ from .michel_electrons import michel_electrons from .example_nue import debug_pid from .statistics import statistics +from .flash_matching import flash_matching diff --git a/analysis/classes/FlashManager.py b/analysis/classes/FlashManager.py index 4daf4a06..7ebcb73f 100644 --- a/analysis/classes/FlashManager.py +++ b/analysis/classes/FlashManager.py @@ -59,6 +59,8 @@ def __init__(self, cfg, cfg_fmatch, meta=None, detector_specs=None): self.size_voxel_x = meta[6] self.size_voxel_y = meta[7] self.size_voxel_z = meta[8] + #print('Meta min = ', self.min_x, self.min_y, self.min_z) + #print('Meta size = ', self.size_voxel_x, self.size_voxel_y, self.size_voxel_z) # Setup flash matching print('Setting up OpT0Finder for flash matching...') @@ -104,7 +106,7 @@ def get_qcluster(self, tpc_id, array=False): raise Exception("TPC object %d does not exist in self.tpc_v" % tpc_id) - def make_qcluster(self, interactions): + def make_qcluster(self, interactions, use_depositions_MeV=False, ADC_to_MeV=1.): """ Make flashmatch::QCluster_t objects from list of interactions. @@ -138,7 +140,7 @@ def make_qcluster(self, interactions): p.points[i, 0] * self.size_voxel_x + self.min_x, p.points[i, 1] * self.size_voxel_y + self.min_y, p.points[i, 2] * self.size_voxel_z + self.min_z, - p.depositions[i]) + p.depositions[i]*ADC_to_MeV*self.det.LightYield() if not use_depositions_MeV else p.depositions_MeV[i]*self.det.LightYield()) # Add it to geoalgo::QCluster_t qcluster.push_back(qpoint) tpc_v.append(qcluster) @@ -188,12 +190,12 @@ def make_flash(self, larcv_flashes): self.pmt_v = pmt_v return pmt_v - def run_flash_matching(self, flashes=None, interactions=None): + def run_flash_matching(self, flashes=None, interactions=None, **kwargs): if self.tpc_v is None: if interactions is None: raise Exception('You need to specify `interactions`, or to run make_qcluster.') if interactions is not None: - self.make_qcluster(interactions) + self.make_qcluster(interactions, **kwargs) if self.pmt_v is None: diff --git a/analysis/classes/TruthInteraction.py b/analysis/classes/TruthInteraction.py index d76f863a..68a23450 100644 --- a/analysis/classes/TruthInteraction.py +++ b/analysis/classes/TruthInteraction.py @@ -12,6 +12,10 @@ def __init__(self, *args, **kwargs): super(TruthInteraction, self).__init__(*args, **kwargs) self.match = [] self._match_counts = {} + self.depositions_MeV = [] + for p in self.particles: + self.depositions_MeV.append(p.depositions_MeV) + self.depositions_MeV = np.hstack(self.depositions_MeV) def check_validity(self): for p in self.particles: diff --git a/analysis/classes/ui.py b/analysis/classes/ui.py index 6ab72311..ae3f9bc5 100644 --- a/analysis/classes/ui.py +++ b/analysis/classes/ui.py @@ -130,7 +130,7 @@ def __init__(self, data_blob, result, cfg, predictor_cfg={}, deghosting=False, self.fm = FlashManager(cfg, flash_matching_cfg, meta=self.data_blob['meta'][0]) self.opflash_keys = opflash_keys - self.flash_matches = {} # key is (volume, use_true_tpc_objects), value is tuple (tpc_v, pmt_v, list of matches) + self.flash_matches = {} # key is (entry, volume, use_true_tpc_objects), value is tuple (tpc_v, pmt_v, list of matches) # type is (list of Interaction/TruthInteraction, list of larcv::Flash, list of flashmatch::FlashMatch_t) @@ -138,7 +138,9 @@ def __repr__(self): msg = "FullChainEvaluator(num_images={})".format(int(self.num_images/self._num_volumes)) return msg - def get_flash_matches(self, entry, use_true_tpc_objects=False, volume=None): + def get_flash_matches(self, entry, use_true_tpc_objects=False, volume=None, + use_depositions_MeV=False, + ADC_to_MeV=1.): """ If flash matches has not yet been computed for this volume, then it will be run as part of this function. Otherwise, flash matching results are @@ -155,13 +157,16 @@ def get_flash_matches(self, entry, use_true_tpc_objects=False, volume=None): ======= list of tuple (Interaction, larcv::Flash, flashmatch::FlashMatch_t) """ - if (volume, use_true_tpc_objects) not in self.flash_matches: - self._run_flash_matching(entry, use_true_tpc_objects=use_true_tpc_objects, volume=volume) + if (entry, volume, use_true_tpc_objects) not in self.flash_matches: + self._run_flash_matching(entry, use_true_tpc_objects=use_true_tpc_objects, volume=volume, + use_depositions_MeV=use_depositions_MeV, ADC_to_MeV=ADC_to_MeV) - tpc_v, pmt_v, matches = self.flash_matches[(volume, use_true_tpc_objects)] + tpc_v, pmt_v, matches = self.flash_matches[(entry, volume, use_true_tpc_objects)] return [(tpc_v[m.tpc_id], pmt_v[m.flash_id], m) for m in matches] - def _run_flash_matching(self, entry, use_true_tpc_objects=False, volume=None): + def _run_flash_matching(self, entry, use_true_tpc_objects=False, volume=None, + use_depositions_MeV=False, + ADC_to_MeV=1.): """ Parameters ========== @@ -178,7 +183,7 @@ def _run_flash_matching(self, entry, use_true_tpc_objects=False, volume=None): else: tpc_v = self.get_interactions(entry, drop_nonprimary_particles=False, volume=volume) - input_tpc_v = self.fm.make_qcluster(tpc_v) + input_tpc_v = self.fm.make_qcluster(tpc_v, use_depositions_MeV=use_depositions_MeV, ADC_to_MeV=ADC_to_MeV) selected_opflash_keys = self.opflash_keys if volume is not None: @@ -190,7 +195,7 @@ def _run_flash_matching(self, entry, use_true_tpc_objects=False, volume=None): input_pmt_v = self.fm.make_flash([self.data_blob[key][entry] for key in selected_opflash_keys]) matches = self.fm.run_flash_matching() - self.flash_matches[(volume, use_true_tpc_objects)] = (tpc_v, pmt_v, matches) + self.flash_matches[(entry, volume, use_true_tpc_objects)] = (tpc_v, pmt_v, matches) def _fit_predict_ppn(self, entry): ''' From 2021ff7120be650689bf1d10298b05379b9369a5 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Thu, 10 Nov 2022 11:16:24 -0800 Subject: [PATCH 46/52] Added option to compute local direction and dedx from particle end point in GrapPA. Warning: all *start* parameters now deprecated. --- mlreco/models/grappa.py | 94 ++++++++++++++++++++----------------- mlreco/utils/gnn/cluster.py | 2 +- 2 files changed, 53 insertions(+), 43 deletions(-) diff --git a/mlreco/models/grappa.py b/mlreco/models/grappa.py index 38d8b74f..1631debd 100644 --- a/mlreco/models/grappa.py +++ b/mlreco/models/grappa.py @@ -36,27 +36,27 @@ class GNN(torch.nn.Module): .. code-block:: yaml base: - node_type : - node_min_size : - source_col : - target_col : - use_dbscan : - add_start_point : - add_start_dir : - start_dir_max_dist: - start_dir_opt : - add_start_dedx : - network : - edge_max_dist : - edge_dist_method : - merge_batch : - merge_batch_mode : - merge_batch_size : - shuffle_clusters : - kinematics_mlp : + source_col : + target_col : + node_type : + node_min_size : + add_points : + add_local_dirs : + dir_max_dist : + add_local_dedxs : + dedx_max_dist : + network : + edge_max_dist : + edge_dist_method: + merge_batch : + merge_batch_mode: + merge_batch_size: + shuffle_clusters: dbscan: dict + dictionary of dbscan parameters + node_encoder: dict .. code-block:: yaml @@ -128,21 +128,28 @@ def __init__(self, cfg, name='grappa', batch_col=0, coords_col=(1, 4)): # Get the chain input parameters base_config = cfg[name].get('base', {}) self.name = name + self.batch_index = batch_col + self.coords_index = coords_col # Choose what type of node to use - self.node_type = base_config.get('node_type', 0) - self.node_min_size = base_config.get('node_min_size', -1) - self.source_col = base_config.get('source_col', 5) - self.target_col = base_config.get('target_col', 6) - self.add_start_point = base_config.get('add_start_point', False) - self.add_start_dir = base_config.get('add_start_dir', False) - self.start_dir_max_dist = base_config.get('start_dir_max_dist', -1) - self.start_dir_opt = base_config.get('start_dir_opt', False) - self.add_start_dedx = base_config.get('add_start_dedx', False) + self.source_col = base_config.get('source_col', 5) + self.target_col = base_config.get('target_col', 6) + self.node_type = base_config.get('node_type', -1) + self.node_min_size = base_config.get('node_min_size', -1) + self.add_points = base_config.get('add_points', False) + self.add_local_dirs = base_config.get('add_local_dirs', False) + self.dir_max_dist = base_config.get('dir_max_dist', 5) + self.opt_dir_max_dist = self.dir_max_dist == 'optimize' + self.add_local_dedxs = base_config.get('add_local_dedxs', False) + self.dedx_max_dist = base_config.get('dedx_max_dist', 5) self.shuffle_clusters = base_config.get('shuffle_clusters', False) - self.batch_index = batch_col - self.coords_index = coords_col + # *Deprecated* but kept for backward compatibility: + if 'add_start_point' in base_config: self.add_points = base_config['add_start_point'] + if 'add_start_dir' in base_config: self.add_local_dirs = 'start' if base_config['add_start_dir'] else False + if 'add_start_dedx' in base_config: self.add_local_dedxs = 'start' if base_config['add_start_dedx'] else False + if 'start_dir_max_dist' in base_config: self.dir_max_dist = self.dedx_max_dist = base_config['start_dir_max_dist'] + if 'start_dir_opt' in base_config: self.opt_dir_max_dist = base_config['start_dir_opt'] # Interpret node type as list of classes to cluster, -1 means all classes if isinstance(self.node_type, int): self.node_type = [self.node_type] @@ -160,7 +167,6 @@ def __init__(self, cfg, name='grappa', batch_col=0, coords_col=(1, 4)): max_dist_mat[np.triu_indices(mat_size)] = self.edge_max_dist max_dist_mat += max_dist_mat.T - np.diag(np.diag(max_dist_mat)) self.edge_max_dist = max_dist_mat - print('edge_max_dist matrix', self.edge_max_dist) # If requested, merge images together within the batch self.merge_batch = base_config.get('merge_batch', False) @@ -357,21 +363,25 @@ def forward(self, data, clusts=None, groups=None, points=None, extra_feats=None, if extra_feats is not None: x = torch.cat([x, extra_feats.float()], dim=1) - # Add start point and/or start direction to node features if requested - if self.add_start_point or points is not None: + # Add end points and/or local directions to node features, if requested + if self.add_points or points is not None: if points is None: points = get_cluster_points_label(cluster_data, particles, clusts, coords_index=self.coords_index) x = torch.cat([x, points.float()], dim=1) - if self.add_start_dir: - dirs_start = get_cluster_directions(cluster_data[:, self.coords_index[0]:self.coords_index[1]], points[:,:3], clusts, self.start_dir_max_dist, self.start_dir_opt) - dirs_end = get_cluster_directions(cluster_data[:, self.coords_index[0]:self.coords_index[1]], points[:,3:6], clusts, self.start_dir_max_dist, self.start_dir_opt) - #x = torch.cat([x, dirs_start.float(), dirs_end.float()], dim=1) - x = torch.cat([x, dirs_start.float()], dim=1) - if self.add_start_dedx: - dedxs_start = get_cluster_dedxs(cluster_data[:, self.coords_index[0]:self.coords_index[1]], cluster_data[:,4], points[:,:3], clusts, self.start_dir_max_dist) - dedxs_end = get_cluster_dedxs(cluster_data[:, self.coords_index[0]:self.coords_index[1]], cluster_data[:,4], points[:,3:6], clusts, self.start_dir_max_dist) - #x = torch.cat([x, dedxs_start.reshape(-1,1).float(), dedxs_end.reshape(-1,1).float()], dim=1) - x = torch.cat([x, dedxs_start.reshape(-1,1).float()], dim=1) + if self.add_local_dirs: + dirs_start = get_cluster_directions(cluster_data[:, self.coords_index[0]:self.coords_index[1]], points[:,:3], clusts, self.dir_max_dist, self.opt_dir_max_dist) + if self.add_local_dirs != 'start': + dirs_end = get_cluster_directions(cluster_data[:, self.coords_index[0]:self.coords_index[1]], points[:,3:6], clusts, self.dir_max_dist, self.opt_dir_max_dist) + x = torch.cat([x, dirs_start.float(), dirs_end.float()], dim=1) + else: + x = torch.cat([x, dirs_start.float()], dim=1) + if self.add_local_dedxs: + dedxs_start = get_cluster_dedxs(cluster_data[:, self.coords_index[0]:self.coords_index[1]], cluster_data[:,4], points[:,:3], clusts, self.dedx_max_dir) + if self.add_local_dedxs != 'start': + dedxs_end = get_cluster_dedxs(cluster_data[:, self.coords_index[0]:self.coords_index[1]], cluster_data[:,4], points[:,3:6], clusts, self.dedx_max_dir) + x = torch.cat([x, dedxs_start.reshape(-1,1).float(), dedxs_end.reshape(-1,1).float()], dim=1) + else: + x = torch.cat([x, dedxs_start.reshape(-1,1).float()], dim=1) # Bring edge_index and batch_ids to device index = torch.tensor(edge_index, device=cluster_data.device, dtype=torch.long) diff --git a/mlreco/utils/gnn/cluster.py b/mlreco/utils/gnn/cluster.py index d933ce93..e7b67f89 100644 --- a/mlreco/utils/gnn/cluster.py +++ b/mlreco/utils/gnn/cluster.py @@ -575,7 +575,7 @@ def cluster_direction(voxels: nb.float64[:,:], torch.tensor: (3) Orientation """ # If max_dist is set, limit the set of voxels to those within a sphere of radius max_dist - if max_dist > 0 and not optimize: + if not optimize and max_dist > 0: dist_mat = cdist_nb(start.reshape(1,-1), voxels).flatten() voxels = voxels[dist_mat <= max_dist] if len(voxels) < 2: From 7a8f915bb2b529eb85456b1d001d69072abfe106 Mon Sep 17 00:00:00 2001 From: Temigo Date: Thu, 10 Nov 2022 15:31:26 -0800 Subject: [PATCH 47/52] Fix bug for volume=None --- analysis/classes/FlashManager.py | 8 ++-- analysis/classes/ui.py | 65 +++++++++++++++++++++++++++----- mlreco/iotools/collates.py | 25 ++++++++++++ 3 files changed, 84 insertions(+), 14 deletions(-) diff --git a/analysis/classes/FlashManager.py b/analysis/classes/FlashManager.py index 7ebcb73f..65c3fc81 100644 --- a/analysis/classes/FlashManager.py +++ b/analysis/classes/FlashManager.py @@ -145,8 +145,8 @@ def make_qcluster(self, interactions, use_depositions_MeV=False, ADC_to_MeV=1.): qcluster.push_back(qpoint) tpc_v.append(qcluster) - if self.tpc_v is not None: - print("Warning: overwriting internal list of particles.") + #if self.tpc_v is not None: + # print("Warning: overwriting internal list of particles.") self.tpc_v = tpc_v return tpc_v @@ -185,8 +185,8 @@ def make_flash(self, larcv_flashes): flash.pe_v.push_back(f.PEPerOpDet()[i + offset]) flash.pe_err_v.push_back(0.) pmt_v.append(flash) - if self.pmt_v is not None: - print("Warning: overwriting internal list of flashes.") + #if self.pmt_v is not None: + # print("Warning: overwriting internal list of flashes.") self.pmt_v = pmt_v return pmt_v diff --git a/analysis/classes/ui.py b/analysis/classes/ui.py index ae3f9bc5..c94d0d05 100644 --- a/analysis/classes/ui.py +++ b/analysis/classes/ui.py @@ -183,8 +183,18 @@ def _run_flash_matching(self, entry, use_true_tpc_objects=False, volume=None, else: tpc_v = self.get_interactions(entry, drop_nonprimary_particles=False, volume=volume) + # If we are not running flash matching over the entire volume at once, + # then we need to shift the coordinates that will be used for flash matching + # back to the reference of the first volume. + if volume is not None: + for tpc_object in tpc_v: + tpc_object.points = self._untranslate(tpc_object.points, volume) input_tpc_v = self.fm.make_qcluster(tpc_v, use_depositions_MeV=use_depositions_MeV, ADC_to_MeV=ADC_to_MeV) + if volume is not None: + for tpc_object in tpc_v: + tpc_object.points = self._translate(tpc_object.points, volume) + # Now making Flash_t objects selected_opflash_keys = self.opflash_keys if volume is not None: assert isinstance(volume, int) @@ -194,6 +204,7 @@ def _run_flash_matching(self, entry, use_true_tpc_objects=False, volume=None, pmt_v.extend(self.data_blob[key][entry]) input_pmt_v = self.fm.make_flash([self.data_blob[key][entry] for key in selected_opflash_keys]) + # Running flash matching and caching the results matches = self.fm.run_flash_matching() self.flash_matches[(entry, volume, use_true_tpc_objects)] = (tpc_v, pmt_v, matches) @@ -517,11 +528,45 @@ def _check_volume(self, volume): assert isinstance(volume, (int, np.int64, np.int32)) and volume >= 0 def _translate(self, voxels, volume): - if self.vb is None: + """ + Go from 1-volume-only back to full volume coordinates + + Parameters + ========== + voxels: np.ndarray + Shape (N, 3) + volume: int + + Returns + ======= + np.ndarray + Shape (N, 3) + """ + if self.vb is None or volume is None: return voxels else: return self.vb.translate(voxels, volume) + def _untranslate(self, voxels, volume): + """ + Go from full volume to 1-volume-only coordinates + + Parameters + ========== + voxels: np.ndarray + Shape (N, 3) + volume: int + + Returns + ======= + np.ndarray + Shape (N, 3) + """ + if self.vb is None or volume is None: + return voxels + else: + return self.vb.untranslate(voxels, volume) + def get_fragments(self, entry, only_primaries=False, min_particle_voxel_count=-1, attaching_threshold=2, @@ -565,7 +610,7 @@ def get_fragments(self, entry, only_primaries=False, out_fragment_list = [] for entry in entries: - volume = entry % self._num_volumes + volume = entry % self._num_volumes if volume is not None else volume point_cloud = self.data_blob['input_data'][entry][:, 1:4] depositions = self.result['input_rescaled'][entry][:, 4] @@ -714,7 +759,7 @@ def get_particles(self, entry, only_primaries=True, out_particle_list = [] for entry in entries: - volume = entry % self._num_volumes + volume = entry % self._num_volumes if volume is not None else volume point_cloud = self.data_blob['input_data'][entry][:, 1:4] depositions = self.result['input_rescaled'][entry][:, 4] @@ -833,7 +878,7 @@ def get_interactions(self, entry, drop_nonprimary_particles=True, volume=None) - out_interaction_list = [] for e in entries: - volume = e % self._num_volumes + volume = e % self._num_volumes if volume is not None else volume particles = self.get_particles(entry, only_primaries=drop_nonprimary_particles, volume=volume) out = group_particles_to_interactions_fn(particles) for ia in out: @@ -1073,7 +1118,7 @@ def get_true_fragments(self, entry, verbose=False, volume=None) -> List[TruthPar out_fragments_list = [] for entry in entries: - volume = entry % self._num_volumes + volume = entry % self._num_volumes if volume is not None else volume # Both are "adapted" labels labels = self.data_blob['cluster_label'][entry] @@ -1185,7 +1230,7 @@ def get_true_particles(self, entry, only_primaries=True, out_particles_list = [] global_entry = entry for entry in entries: - volume = entry % self._num_volumes + volume = entry % self._num_volumes if volume is not None else volume labels = self.data_blob['cluster_label'][entry] if self.deghosting: @@ -1328,7 +1373,7 @@ def get_true_interactions(self, entry, drop_nonprimary_particles=True, entries = self._get_entries(entry, volume) out_interactions_list = [] for e in entries: - volume = e % self._num_volumes + volume = e % self._num_volumes if volume is not None else volume true_particles = self.get_true_particles(entry, only_primaries=drop_nonprimary_particles, volume=volume) out = group_particles_to_interactions_fn(true_particles, get_nu_id=True, mode='truth') @@ -1359,7 +1404,7 @@ def get_true_vertices(self, entry, volume=None): entries = self._get_entries(entry, volume) out = {} for entry in entries: - volume = entry % self._num_volumes + volume = entry % self._num_volumes if volume is not None else volume inter_idxs = np.unique( self.data_blob['cluster_label'][entry][:, 7].astype(int)) for inter_idx in inter_idxs: @@ -1394,7 +1439,7 @@ def match_particles(self, entry, entries = self._get_entries(entry, volume) all_matches = [] for e in entries: - volume = e % self._num_volumes + volume = e % self._num_volumes if volume is not None else volume if mode == 'pred_to_true': # Match each pred to one in true particles_from = self.get_particles(entry, only_primaries=only_primaries, volume=volume) @@ -1440,7 +1485,7 @@ def match_interactions(self, entry, mode='pred_to_true', entries = self._get_entries(entry, volume) all_matches, all_counts = [], [] for e in entries: - volume = e % self._num_volumes + volume = e % self._num_volumes if volume is not None else volume if mode == 'pred_to_true': ints_from = self.get_interactions(entry, drop_nonprimary_particles=drop_nonprimary_particles, volume=volume) ints_to = self.get_true_interactions(entry, drop_nonprimary_particles=drop_nonprimary_particles, volume=volume) diff --git a/mlreco/iotools/collates.py b/mlreco/iotools/collates.py index ee040b03..9a4770fa 100644 --- a/mlreco/iotools/collates.py +++ b/mlreco/iotools/collates.py @@ -131,6 +131,31 @@ def translate(self, voxels, volume): new_voxels[..., n] += int(self.shifts[n][self.combo[volume][n]]) return new_voxels + def untranslate(self, voxels, volume): + """ + Meant to reverse what the translate method does: for voxels coordinates initially in the range of full detector, + translate to the range of 1 volume for a specific volume given in argument. + + Parameters + ========== + voxels: np.ndarray + Expected shape is (D_0, ..., D_N, self.dim) with N >=0. In other words, voxels can be a list of + coordinate or a single coordinate with shape (d,). + volume: int + + Returns + ======= + np.ndarray + Translated voxels array, using internally computed shifts. + """ + assert volume >= 0 and volume < self.num_volumes() + assert voxels.shape[-1] == self.dim + + new_voxels = voxels.copy() + for n in range(self.dim): + new_voxels[..., n] -= int(self.shifts[n][self.combo[volume][n]]) + return new_voxels + def split(self, voxels): """ Parameters From be9a192fdca56f520aec839bc4f947f137d49fd1 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Tue, 15 Nov 2022 10:33:53 -0800 Subject: [PATCH 48/52] Bug fix GNN full chain when using charge rescaling + gSPICE --- mlreco/models/layers/common/gnn_full_chain.py | 2 +- mlreco/visualization/training.py | 53 +++++++++++-------- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/mlreco/models/layers/common/gnn_full_chain.py b/mlreco/models/layers/common/gnn_full_chain.py index 26d4a08d..f665b962 100644 --- a/mlreco/models/layers/common/gnn_full_chain.py +++ b/mlreco/models/layers/common/gnn_full_chain.py @@ -726,7 +726,7 @@ def forward(self, out, seg_label, ppn_label=None, cluster_label=None, kinematics segmentation_pred = out['segmentation'][0] - if self.enable_ghost and not self.enable_charge_rescaling: + if self.enable_ghost: segmentation_pred = segmentation_pred[deghost] if self._gspice_use_true_labels: gs_seg_label = torch.cat([cluster_label[0][:, :4], segment_label[:, None]], dim=1) diff --git a/mlreco/visualization/training.py b/mlreco/visualization/training.py index 40c7ed87..2e09ac75 100644 --- a/mlreco/visualization/training.py +++ b/mlreco/visualization/training.py @@ -156,7 +156,8 @@ def get_validation_df(log_dir, keys, prefix='inference'): def draw_training_curves(log_dir, models, metrics, limits={}, model_names={}, metric_names={}, - max_iter=-1, step=1, smoothing=1, print_min=False, print_max=False, + max_iter=-1, step=1, smoothing=1, iter_per_epoch=-1, + print_min=False, print_max=False, interactive=True, same_plot=True, paper=False, leg_ncols=1, figure_name='', train_prefix='train', val_prefix='inference'): """ @@ -164,22 +165,23 @@ def draw_training_curves(log_dir, models, metrics, directory and draws an evolution plot of the request quantities. Args: - log_dir (str) : Path to the directory that contains the folder with log files - models (list) : List of model (folder) names under the main directory - metrics (list) : List of quantities to draw - limits (list/dict) : List of y boundaries for the plot (or dictionary of y boundaries, one per metric) - model_names (dict) : Dictionary which maps raw model names to model labels (default: `{}`) - metric_names (dict): Dictionary which maps raw metric names to metric labels (default: `{}`) - max_iter (int) : Maximum number of interation to include in the plot (default: `-1`) - step (int) : Step between two successive iterations that are represented (default: `1`) - smoothing (int) : Number of iteration over which to average the metric value (default: `1`) - interactive (bool) : Use plotly to draw (default: `True`) - same_plot (bool) : Draw all model/metric pairs on a single plot (default: `True`) - paper (bool) : Format plot for paper, using latex (default: `False`) - leg_ncols (int) : Number of columns in the legend (default: `1`) - figure_name (str) : Name of the figure. If specified, figure is saved (default: `''`) - train_prefix (str) : Prefix shared between training file names (default: `train`) - val_prefix (str) : Prefix shared between validation file names (default: `inference`) + log_dir (str) : Path to the directory that contains the folder with log files + models (list) : List of model (folder) names under the main directory + metrics (list) : List of quantities to draw + limits (list/dict) : List of y boundaries for the plot (or dictionary of y boundaries, one per metric) + model_names (dict) : Dictionary which maps raw model names to model labels (default: `{}`) + metric_names (dict) : Dictionary which maps raw metric names to metric labels (default: `{}`) + max_iter (int) : Maximum number of interation to include in the plot (default: `-1`) + step (int) : Step between two successive iterations that are represented (default: `1`) + smoothing (int) : Number of iteration over which to average the metric value (default: `1`) + iter_per_epoch (float): Number of iterations to complete an epoch (default: `-1`, figures it out from train log) + interactive (bool) : Use plotly to draw (default: `True`) + same_plot (bool) : Draw all model/metric pairs on a single plot (default: `True`) + paper (bool) : Format plot for paper, using latex (default: `False`) + leg_ncols (int) : Number of columns in the legend (default: `1`) + figure_name (str) : Name of the figure. If specified, figure is saved (default: `''`) + train_prefix (str) : Prefix shared between training file names (default: `train`) + val_prefix (str) : Prefix shared between validation file names (default: `inference`) """ # Set the style plotly_colors = pcolors.convert_colors_to_same_type(pcolors.DEFAULT_PLOTLY_COLORS, 'tuple')[0] @@ -251,11 +253,18 @@ def draw_training_curves(log_dir, models, metrics, metric_train = dfs[key][metric][:max_iter:step] if smoothing == 1 else dfs[key][metric][:max_iter].rolling(smoothing, min_periods=1, center=True).mean()[::step] draw_val = bool(len(val_dfs[key]['iter'])) if draw_val: - mask_val = val_dfs[key]['iter'] < max_iter if max_iter > -1 else val_dfs[key]['iter'] < 1e12 - iter_val = val_dfs[key]['iter'][mask_val] - epoch_val = [float(dfs[key]['epoch'][dfs[key]['iter'] == it]) for it in iter_val] - metricm_val = val_dfs[key][metric_name+'_mean'][mask_val] - metrice_val = val_dfs[key][metric_name+'_err'][mask_val] + mask_val = val_dfs[key]['iter'] < max_iter if max_iter > -1 else val_dfs[key]['iter'] < 1e12 + iter_val = val_dfs[key]['iter'][mask_val] + if iter_per_epoch < 0: + epoch_val = [dfs[key]['epoch'][dfs[key]['iter'] == it] for it in iter_val] + epoch_val = np.array([float(e) if len(e)==1 else -1 for e in epoch_val]) + mask_val &= epoch_val > -1 + iter_val = iter_val[epoch_val > -1] + epoch_val = epoch_val[epoch_val > -1] + else: + epoch_val = iter_val/iter_per_epoch + metricm_val = val_dfs[key][metric_name+'_mean'][mask_val] + metrice_val = val_dfs[key][metric_name+'_err'][mask_val] # Pick a label for this specific model/metric pair if not same_plot: From 354e5ee71b6ee0757c45eb3394722a52a8d0ffc3 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Wed, 16 Nov 2022 23:25:14 -0800 Subject: [PATCH 49/52] Add option to exclude MPR particles from the primary target --- mlreco/iotools/parsers/cluster.py | 14 ++++++++++---- mlreco/utils/groups.py | 10 +++++++++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/mlreco/iotools/parsers/cluster.py b/mlreco/iotools/parsers/cluster.py index 72216e61..6659d94f 100644 --- a/mlreco/iotools/parsers/cluster.py +++ b/mlreco/iotools/parsers/cluster.py @@ -63,7 +63,8 @@ def parse_cluster3d(cluster_event, add_kinematics_info = False, clean_data = True, precedence = [1,2,0,3,4], - type_include_mpr = False): + type_include_mpr = False, + primary_include_mpr = True): """ a function to retrieve a 3D clusters tensor @@ -82,6 +83,8 @@ def parse_cluster3d(cluster_event, add_kinematics_info: false clean_data: true precedence: [1,2,0,3,4] + type_include_mpr: false + primary_include_mpr: true Configuration ------------- @@ -94,6 +97,8 @@ def parse_cluster3d(cluster_event, add_kinematics_info: bool clean_data: bool precedence: list + type_include_mpr: bool + primary_include_mpr: bool Returns ------- @@ -140,7 +145,7 @@ def parse_cluster3d(cluster_event, labels['type'] = get_particle_id(particles_v, nu_ids, include_mpr=type_include_mpr) labels['primary_shower'] = get_shower_primary_id(cluster_event, particles_v) if add_kinematics_info: - primary_ids = get_group_primary_id(particles_v) + primary_ids = get_group_primary_id(particles_v, nu_ids, include_mpr=primary_include_mpr) particles_v = parse_particles(particle_event, cluster_event) labels['type'] = get_particle_id(particles_v, nu_ids, include_mpr=type_include_mpr) labels['p'] = np.array([p.p()/1e3 for p in particles_v]) # In GeV @@ -202,10 +207,11 @@ def parse_cluster3d_charge_rescaled(cluster_event, add_kinematics_info = False, clean_data = True, precedence = [1,2,0,3,4], - type_include_mpr = False): + type_include_mpr = False, + primary_include_mpr = False): # Produces cluster3d labels with sparse3d_reco_rescaled on the fly on datasets that do not have it np_voxels, np_features = parse_cluster3d(cluster_event, particle_event, particle_mpv_event, sparse_semantics_event, None, - add_particle_info, add_kinematics_info, clean_data, precedence, type_include_mpr) + add_particle_info, add_kinematics_info, clean_data, precedence, type_include_mpr, primary_include_mpr) from .sparse import parse_sparse3d_charge_rescaled _, val_features = parse_sparse3d_charge_rescaled(sparse_value_event_list) diff --git a/mlreco/utils/groups.py b/mlreco/utils/groups.py index 4771bd13..7c948234 100644 --- a/mlreco/utils/groups.py +++ b/mlreco/utils/groups.py @@ -333,6 +333,7 @@ def get_particle_id(particles_v, nu_ids, include_mpr=False): Inputs: - particles_v (array of larcv::Particle) : (N) LArCV Particle objects - nu_ids: a numpy array with shape (n, 1) where 1 is neutrino id (0 if not an MPV) + - include_mpr: include MPR (cosmic-like) particles to PID target Outputs: - array: (N) list of group ids ''' @@ -410,19 +411,26 @@ def get_shower_primary_id(cluster_event, particles_v): return primary_ids -def get_group_primary_id(particles_v): +def get_group_primary_id(particles_v, nu_ids=None, include_mpr=True): ''' Function that assigns valid primary tags to particle groups. This could be handled somewhere else (e.g. SUPERA) Inputs: - particles_v (array of larcv::Particle) : (N) LArCV Particle objects + - nu_ids: a numpy array with shape (n, 1) where 1 is neutrino id (0 if not an MPV) + - include_mpr: include MPR (cosmic-like) particles to primary target Outputs: - array: (N) list of group ids ''' # Loop over the list of particles primary_ids = np.empty(particles_v.size(), dtype=np.int32) for i, p in enumerate(particles_v): + # If MPR particles are not included and the nu_id < 1, assign invalid + if not include_mpr and nu_ids[i] < 1: + primary_ids[i] = -1 + continue + # If the particle is not a shower or a track, it is not a primary if p.shape() != 0 and p.shape() != 1: primary_ids[i] = 0 From 6afb611e96db053b50948167d0644f9beaa0659a Mon Sep 17 00:00:00 2001 From: Temigo Date: Thu, 17 Nov 2022 10:43:13 -0800 Subject: [PATCH 50/52] Volume bug fix + reflash merging option --- analysis/algorithms/selections/example_nue.py | 41 +++- .../algorithms/selections/flash_matching.py | 228 ++++++++++++++++++ analysis/algorithms/selections/statistics.py | 2 +- .../selections/through_going_muons.py | 4 +- analysis/classes/FlashManager.py | 49 +++- analysis/classes/ui.py | 37 +-- 6 files changed, 339 insertions(+), 22 deletions(-) create mode 100644 analysis/algorithms/selections/flash_matching.py diff --git a/analysis/algorithms/selections/example_nue.py b/analysis/algorithms/selections/example_nue.py index 3e3391b0..5474d11f 100644 --- a/analysis/algorithms/selections/example_nue.py +++ b/analysis/algorithms/selections/example_nue.py @@ -9,6 +9,12 @@ import time import numpy as np +# Setup OpT0finder +import os, sys +sys.path.append('/sdf/group/neutrino/ldomine/OpT0Finder/python') +import flashmatch +from flashmatch import flashmatch, geoalgo + @evaluate(['interactions', 'particles'], mode='per_batch') def debug_pid(data_blob, res, data_idx, analysis_cfg, cfg): @@ -16,8 +22,19 @@ def debug_pid(data_blob, res, data_idx, analysis_cfg, cfg): interactions, particles = [], [] deghosting = analysis_cfg['analysis']['deghosting'] primaries = analysis_cfg['analysis']['match_primaries'] + enable_flash_matching = analysis_cfg['analysis'].get('enable_flash_matching', False) + ADC_to_MeV = analysis_cfg['analysis'].get('ADC_to_MeV', 1./350.) + + processor_cfg = analysis_cfg['analysis'].get('processor_cfg', {}) + if enable_flash_matching: + predictor = FullChainEvaluator(data_blob, res, cfg, processor_cfg, + deghosting=deghosting, + enable_flash_matching=True, + flash_matching_cfg=os.path.join(os.environ['FMATCH_BASEDIR'], "dat/flashmatch_112022.cfg"), + opflash_keys=['opflash_cryoE', 'opflash_cryoW']) + else: + predictor = FullChainEvaluator(data_blob, res, cfg, processor_cfg) - predictor = FullChainEvaluator(data_blob, res, cfg, analysis_cfg) image_idxs = data_blob['index'] print(data_blob['index'], data_blob['run_info']) for idx, index in enumerate(image_idxs): @@ -27,6 +44,11 @@ def debug_pid(data_blob, res, data_idx, analysis_cfg, cfg): 'subrun': data_blob['run_info'][idx][1], 'event': data_blob['run_info'][idx][2] } + if enable_flash_matching: + flash_matches_cryoE = predictor.get_flash_matches(idx, use_true_tpc_objects=False, volume=0, + use_depositions_MeV=False, ADC_to_MeV=ADC_to_MeV) + flash_matches_cryoW = predictor.get_flash_matches(idx, use_true_tpc_objects=False, volume=1, + use_depositions_MeV=False, ADC_to_MeV=ADC_to_MeV) # Process Interaction Level Information matches, counts = predictor.match_interactions(idx, @@ -82,6 +104,23 @@ def debug_pid(data_blob, res, data_idx, analysis_cfg, cfg): true_int_dict['true_nu_energy'] = nu.energy_init() pred_int_dict['interaction_match_counts'] = counts[i] + + if enable_flash_matching: + volume = true_int.volume if true_int is not None else pred_int.volume + flash_matches = flash_matches_cryoW if volume == 1 else flash_matches_cryoE + pred_int_dict['fmatched'] = False + pred_int_dict['fmatch_time'] = None + pred_int_dict['fmatch_total_pe'] = None + pred_int_dict['fmatch_id'] = None + if pred_int is not None: + for interaction, flash, match in flash_matches: + if interaction.id != pred_int.id: continue + pred_int_dict['fmatched'] = True + pred_int_dict['fmatch_time'] = flash.time() + pred_int_dict['fmatch_total_pe'] = flash.TotalPE() + pred_int_dict['fmatch_id'] = flash.id() + break + interactions_dict = OrderedDict(index_dict.copy()) interactions_dict.update(true_int_dict) interactions_dict.update(pred_int_dict) diff --git a/analysis/algorithms/selections/flash_matching.py b/analysis/algorithms/selections/flash_matching.py new file mode 100644 index 00000000..e9d087ce --- /dev/null +++ b/analysis/algorithms/selections/flash_matching.py @@ -0,0 +1,228 @@ +from collections import OrderedDict +from analysis.algorithms.utils import count_primary_particles, get_particle_properties +from analysis.classes.ui import FullChainEvaluator + +from analysis.decorator import evaluate +from analysis.classes.particle import match_particles_fn, matrix_iou + +from pprint import pprint +import time +import numpy as np +import os, sys + +# Setup OpT0finder +sys.path.append('/sdf/group/neutrino/ldomine/OpT0Finder/python') +import flashmatch +from flashmatch import flashmatch, geoalgo + + +def find_true_time(interaction): + """ + Returns + ======= + Time in us + """ + time = None + for p in interaction.particles: + if not p.is_primary: continue + time = 1e-3 * p.asis.ancestor_t() if time is None else min(time, 1e-3 * p.particle_asis.ancestor_t()) + return time + +def find_true_x(interaction): + """ + Returns + ======= + True vertex x in cm (absolute coordinates) + """ + x = [] + for p in interaction.particles: + if not p.is_primary: continue + x.append(p.asis.x()) + if len(x) == 0: + return None + values, counts = np.unique(x, return_counts=True) + if len(values) > 1: + print("Warning found > 1 true x in interaction", values, counts) + return values[np.argmax(counts)] + + +@evaluate(['interactions', 'flashes', 'matches'], mode='per_batch') +def flash_matching(data_blob, res, data_idx, analysis_cfg, cfg): + + interactions, flashes, matches = [], [], [] + deghosting = analysis_cfg['analysis']['deghosting'] + primaries = analysis_cfg['analysis']['drop_nonprimary_particles'] + use_true_tpc_objects = analysis_cfg['analysis'].get('use_true_tpc_objects', False) + use_depositions_MeV = analysis_cfg['analysis'].get('use_depositions_MeV', False) + ADC_to_MeV = analysis_cfg['analysis'].get('ADC_to_MeV', 1./350.) + + processor_cfg = analysis_cfg['analysis'].get('processor_cfg', {}) + predictor = FullChainEvaluator(data_blob, res, cfg, processor_cfg, + deghosting=deghosting, + enable_flash_matching=True, + flash_matching_cfg=os.path.join(os.environ['FMATCH_BASEDIR'], "dat/flashmatch_112022.cfg"), + opflash_keys=['opflash_cryoE', 'opflash_cryoW']) + + image_idxs = data_blob['index'] + print(data_idx, data_blob['index'], data_blob['run_info']) + for idx, index in enumerate(image_idxs): + index_dict = { + 'Index': index, + 'run': data_blob['run_info'][idx][0], + 'subrun': data_blob['run_info'][idx][1], + 'event': data_blob['run_info'][idx][2] + } + meta = data_blob['meta'][idx] + + all_times_cryoE, all_times_cryoW = [], [] + for flash in data_blob['opflash_cryoE'][idx]: + all_times_cryoE.append(flash.time()) + for flash in data_blob['opflash_cryoW'][idx]: + all_times_cryoW.append(flash.time()) + ordered_flashes_cryoE = np.array(data_blob['opflash_cryoE'][idx])[np.argsort(all_times_cryoE)] + ordered_flashes_cryoW = np.array(data_blob['opflash_cryoW'][idx])[np.argsort(all_times_cryoW)] + + prev_flash_time, next_flash_time = {}, {} + for flash_idx, flash in enumerate(ordered_flashes_cryoE): + if flash_idx > 0: + prev_flash_time[(0, flash.id())] = ordered_flashes_cryoE[flash_idx-1].time() + else: + prev_flash_time[(0, flash.id())] = None + if flash_idx < len(ordered_flashes_cryoE)-1: + next_flash_time[(0, flash.id())] = ordered_flashes_cryoE[flash_idx+1].time() + else: + next_flash_time[(0, flash.id())] = None + for flash_idx, flash in enumerate(ordered_flashes_cryoW): + if flash_idx > 0: + prev_flash_time[(1, flash.id())] = ordered_flashes_cryoW[flash_idx-1].time() + else: + prev_flash_time[(1, flash.id())] = None + if flash_idx < len(ordered_flashes_cryoW)-1: + next_flash_time[(1, flash.id())] = ordered_flashes_cryoW[flash_idx+1].time() + else: + next_flash_time[(1, flash.id())] = None + + flash_matches_cryoE = predictor.get_flash_matches(idx, use_true_tpc_objects=use_true_tpc_objects, volume=0, + use_depositions_MeV=use_depositions_MeV, ADC_to_MeV=ADC_to_MeV) + flash_matches_cryoW = predictor.get_flash_matches(idx, use_true_tpc_objects=use_true_tpc_objects, volume=1, + use_depositions_MeV=use_depositions_MeV, ADC_to_MeV=ADC_to_MeV) + + matched_interactions = None + if not use_true_tpc_objects: + matched_interactions = predictor.match_interactions(idx, + mode='pred_to_true', drop_nonprimary_particles=primaries, match_particles=True) + + interaction_ids, flash_ids = [], [] + for interaction, flash, match in flash_matches_cryoE + flash_matches_cryoW: + interaction_ids.append(interaction.id) + flash_ids.append(flash.id()) + + interaction_dict = OrderedDict(index_dict.copy()) + + interaction_dict['interaction_id'] = interaction.id + interaction_dict['size'] = interaction.size + interaction_dict['num_particles'] = interaction.num_particles + interaction_dict['interaction_min_x'] = interaction.points[:, 0].min() + interaction_dict['interaction_max_x'] = interaction.points[:, 0].max() + interaction_dict['interaction_min_y'] = interaction.points[:, 1].min() + interaction_dict['interaction_max_y'] = interaction.points[:, 1].max() + interaction_dict['interaction_min_z'] = interaction.points[:, 2].min() + interaction_dict['interaction_max_z'] = interaction.points[:, 2].max() + interaction_dict['interaction_edep'] = interaction.depositions.sum() + interaction_dict['fmatched'] = True + interaction_dict['volume'] = interaction.volume + + if not use_true_tpc_objects: # Using TruthInteraction + for pred_int, true_int in matched_interactions: + if pred_int.id != interaction.id: continue + if true_int is None: + interaction_dict['matched'] = False + interaction_dict['true_time'] = None + interaction_dict['true_x'] = None + else: + interaction_dict['matched'] = True + interaction_dict['true_time'] = find_true_time(true_int) + interaction_dict['true_x'] = find_true_x(true_int) + else: + interaction_dict['true_time'] = find_true_time(interaction) + interaction_dict['true_x'] = find_true_x(interaction) + interaction_dict['interaction_edep_MeV'] = interaction.depositions_MeV.sum() + + flash_dict = OrderedDict(index_dict.copy()) + + flash_dict['flash_id'] = flash.id() + flash_dict['time'] = flash.time() + flash_dict['total_pe'] = flash.TotalPE() + flash_dict['abstime'] = flash.absTime() + flash_dict['time_width'] = flash.timeWidth() + flash_dict['fmatched'] = True + flash_dict['volume'] = interaction.volume + flash_dict['prev_flash_time'] = prev_flash_time[(interaction.volume, flash.id())] + flash_dict['next_flash_time'] = next_flash_time[(interaction.volume, flash.id())] + + interactions.append(interaction_dict) + flashes.append(flash_dict) + match_dict = flash_dict.copy() + match_dict.update(interaction_dict) + match_dict['fmatch_score'] = match.score + # Convert from absolute cm to voxel coordinates + match_dict['fmatch_x'] = (match.tpc_point.x - meta[0]) / meta[6] + match_dict['hypothesis_total_pe'] = np.sum(match.hypothesis) + matches.append(match_dict) + + if use_true_tpc_objects: + all_interactions = predictor.get_true_interactions(idx, drop_nonprimary_particles=primaries) + else: + all_interactions = predictor.get_interactions(idx, drop_nonprimary_particles=primaries) + + for interaction in all_interactions: + if interaction.id in interaction_ids: continue + + interaction_dict = OrderedDict(index_dict.copy()) + interaction_dict['interaction_id'] = interaction.id + interaction_dict['size'] = interaction.size + interaction_dict['num_particles'] = interaction.num_particles + interaction_dict['interaction_min_x'] = interaction.points[:, 0].min() + interaction_dict['interaction_max_x'] = interaction.points[:, 0].max() + interaction_dict['interaction_min_y'] = interaction.points[:, 1].min() + interaction_dict['interaction_max_y'] = interaction.points[:, 1].max() + interaction_dict['interaction_min_z'] = interaction.points[:, 2].min() + interaction_dict['interaction_max_z'] = interaction.points[:, 2].max() + interaction_dict['interaction_edep'] = interaction.depositions.sum() + interaction_dict['fmatched'] = False + + if not use_true_tpc_objects: # Using TruthInteraction + for pred_int, true_int in matched_interactions: + if pred_int.id != interaction.id: continue + if true_int is None: + interaction_dict['matched'] = False + interaction_dict['true_time'] = None + interaction_dict['true_x'] = None + else: + interaction_dict['matched'] = True + interaction_dict['true_time'] = find_true_time(true_int) + interaction_dict['true_x'] = find_true_x(true_int) + else: + interaction_dict['true_time'] = find_true_time(interaction) + interaction_dict['true_x'] = find_true_x(interaction) + interaction_dict['interaction_edep_MeV'] = interaction.depositions_MeV.sum() + interactions.append(interaction_dict) + + volume = [0] * len(data_blob['opflash_cryoE'][idx]) + volume += [1] * len(data_blob['opflash_cryoW'][idx]) + for flash_idx, flash in enumerate(data_blob['opflash_cryoE'][idx] + data_blob['opflash_cryoW'][idx]): + if flash.id() in flash_ids: continue + flash_dict = OrderedDict(index_dict.copy()) + + flash_dict['flash_id'] = flash.id() + flash_dict['time'] = flash.time() + flash_dict['total_pe'] = flash.TotalPE() + flash_dict['abstime'] = flash.absTime() + flash_dict['time_width'] = flash.timeWidth() + flash_dict['fmatched'] = False + flash_dict['volume'] = volume[flash_idx] + flash_dict['prev_flash_time'] = prev_flash_time[(volume[flash_idx], flash.id())] + flash_dict['next_flash_time'] = next_flash_time[(volume[flash_idx], flash.id())] + flashes.append(flash_dict) + + return [interactions, flashes, matches] #[interactions, flashes] diff --git a/analysis/algorithms/selections/statistics.py b/analysis/algorithms/selections/statistics.py index c0b5db09..dd1de187 100644 --- a/analysis/algorithms/selections/statistics.py +++ b/analysis/algorithms/selections/statistics.py @@ -28,7 +28,7 @@ def statistics(data_blob, res, data_idx, analysis_cfg, cfg): bin_size = processor_cfg.get('bin_size', 17) # 5cm # Initialize analysis differently depending on data/MC setting - predictor = FullChainPredictor(data_blob, res, cfg, analysis_cfg, deghosting=deghosting) + predictor = FullChainPredictor(data_blob, res, cfg, processor_cfg, deghosting=deghosting) image_idxs = data_blob['index'] pca = PCA(n_components=2) diff --git a/analysis/algorithms/selections/through_going_muons.py b/analysis/algorithms/selections/through_going_muons.py index 589ca9e9..f91ba11f 100644 --- a/analysis/algorithms/selections/through_going_muons.py +++ b/analysis/algorithms/selections/through_going_muons.py @@ -123,9 +123,9 @@ def through_going_muons(data_blob, res, data_idx, analysis_cfg, cfg): # # Initialize analysis differently depending on data/MC setting if not data: - predictor = FullChainEvaluator(data_blob, res, cfg, analysis_cfg, deghosting=deghosting) + predictor = FullChainEvaluator(data_blob, res, cfg, processor_cfg, deghosting=deghosting) else: - predictor = FullChainPredictor(data_blob, res, cfg, analysis_cfg, deghosting=deghosting) + predictor = FullChainPredictor(data_blob, res, cfg, processor_cfg, deghosting=deghosting) image_idxs = data_blob['index'] diff --git a/analysis/classes/FlashManager.py b/analysis/classes/FlashManager.py index 65c3fc81..9c9a0cd8 100644 --- a/analysis/classes/FlashManager.py +++ b/analysis/classes/FlashManager.py @@ -1,4 +1,5 @@ import os, sys +import numpy as np class FlashManager: @@ -7,7 +8,7 @@ class FlashManager: See https://github.com/drinkingkazu/OpT0Finder for more details about it. """ - def __init__(self, cfg, cfg_fmatch, meta=None, detector_specs=None): + def __init__(self, cfg, cfg_fmatch, meta=None, detector_specs=None, reflash_merging_window=None): """ Expects that the environment variable `FMATCH_BASEDIR` is set. You can either set it by hand (to the path where one can find @@ -80,6 +81,8 @@ def __init__(self, cfg, cfg_fmatch, meta=None, detector_specs=None): self.all_matches = None self.pmt_v, self.tpc_v = None, None + self.reflash_merging_window = reflash_merging_window + def get_flash(self, flash_id, array=False): from flashmatch import flashmatch @@ -166,12 +169,13 @@ def make_flash(self, larcv_flashes): for branch in larcv_flashes: flashes.extend(branch) - pmt_v = [] + pmt_v, times = [], [] for idx, f in enumerate(flashes): # f is an object of type larcv::Flash flash = flashmatch.Flash_t() flash.idx = f.id() # Assign a unique index flash.time = f.time() # Flash timing, a candidate T0 + times.append(flash.time) # Assign the flash position and error on this position flash.x, flash.y, flash.z = 0, 0, 0 @@ -187,9 +191,50 @@ def make_flash(self, larcv_flashes): pmt_v.append(flash) #if self.pmt_v is not None: # print("Warning: overwriting internal list of flashes.") + if self.reflash_merging_window is not None: + # then proceed to merging close flashes + perm = np.argsort(times) + pmt_v = np.array(pmt_v)[perm] + final_pmt_v = [pmt_v[0]] + is_merging = False + for idx, flash in enumerate(pmt_v[1:]): + if flash.time - final_pmt_v[-1].time < self.reflash_merging_window: + new_flash = self.merge_flashes(flash, final_pmt_v[-1]) + final_pmt_v[-1] = new_flash + else: + final_pmt_v.append(flash) + pmt_v = final_pmt_v + self.pmt_v = pmt_v return pmt_v + def merge_flashes(self, a, b): + """ + Util to merge 2 flashmatch::Flash_t objects on the fly. + + Final time is minimum of both times. Final PE count per + photodetectors is the sum between the 2 flashes. + + Parameters + ========== + a: flashmatch::Flash_t + b: flashmatch::Flash_t + + Returns + ======= + flashmatch::Flash_t + """ + from flashmatch import flashmatch + flash = flashmatch.Flash_t() + flash.idx = min(a.idx, b.idx) + flash.time = min(a.time, b.time) + flash.x, flash.y, flash.z = min(a.x, b.x), min(a.y, b.y), min(a.z, b.z) + flash.x_err, flash.y_err, flash.z_err = min(a.x_err, b.x_err), min(a.y_err, b.y_err), min(a.z_err, b.z_err) + for i in range(180): + flash.pe_v.push_back(a.pe_v[i] + b.pe_v[i]) + flash.pe_err_v.push_back(a.pe_err_v[i] + b.pe_err_v[i]) + return flash + def run_flash_matching(self, flashes=None, interactions=None, **kwargs): if self.tpc_v is None: if interactions is None: diff --git a/analysis/classes/ui.py b/analysis/classes/ui.py index c94d0d05..2f16fa34 100644 --- a/analysis/classes/ui.py +++ b/analysis/classes/ui.py @@ -73,7 +73,7 @@ def __init__(self, data_blob, result, cfg, predictor_cfg={}, deghosting=False, self.num_images = len(data_blob['input_data']) self.index = self.data_blob['index'] - self.spatial_size = predictor_cfg.get('spatial_size', 768) + # self.spatial_size = predictor_cfg.get('spatial_size', 768) # For matching particles and interactions self.min_overlap_count = predictor_cfg.get('min_overlap_count', 0) # Idem, can be 'count' or 'iou' @@ -89,6 +89,8 @@ def __init__(self, data_blob, result, cfg, predictor_cfg={}, deghosting=False, self.batch_mask = self.data_blob['input_data'] + # This is used to apply fiducial volume cuts. + # Min/max boundaries in each dimension haev to be specified. self.volume_boundaries = predictor_cfg.get('volume_boundaries', None) if self.volume_boundaries is None: # Using ICARUS Cryo 0 as a default @@ -108,6 +110,8 @@ def __init__(self, data_blob, result, cfg, predictor_cfg={}, deghosting=False, # Determine whether we need to account for several distinct volumes # split over "virtual" batch ids + # Note this is different from "self.volume_boundaries" above + # FIXME rename one or the other to be clearer boundaries = cfg['iotool'].get('collate', {}).get('boundaries', None) if boundaries is not None: self.vb = VolumeBoundaries(boundaries) @@ -120,6 +124,8 @@ def __init__(self, data_blob, result, cfg, predictor_cfg={}, deghosting=False, self.enable_flash_matching = enable_flash_matching self.fm = None if enable_flash_matching: + reflash_merging_window = predictor_cfg.get('reflash_merging_window', None) + if 'meta' not in self.data_blob: raise Exception('Meta unspecified in data_blob. Please add it to your I/O schema.') #if 'FMATCH_BASEDIR' not in os.environ: @@ -127,7 +133,7 @@ def __init__(self, data_blob, result, cfg, predictor_cfg={}, deghosting=False, assert os.path.exists(flash_matching_cfg) assert len(opflash_keys) == self._num_volumes - self.fm = FlashManager(cfg, flash_matching_cfg, meta=self.data_blob['meta'][0]) + self.fm = FlashManager(cfg, flash_matching_cfg, meta=self.data_blob['meta'][0], reflash_merging_window=reflash_merging_window) self.opflash_keys = opflash_keys self.flash_matches = {} # key is (entry, volume, use_true_tpc_objects), value is tuple (tpc_v, pmt_v, list of matches) @@ -610,7 +616,7 @@ def get_fragments(self, entry, only_primaries=False, out_fragment_list = [] for entry in entries: - volume = entry % self._num_volumes if volume is not None else volume + volume = entry % self._num_volumes point_cloud = self.data_blob['input_data'][entry][:, 1:4] depositions = self.result['input_rescaled'][entry][:, 4] @@ -759,7 +765,7 @@ def get_particles(self, entry, only_primaries=True, out_particle_list = [] for entry in entries: - volume = entry % self._num_volumes if volume is not None else volume + volume = entry % self._num_volumes point_cloud = self.data_blob['input_data'][entry][:, 1:4] depositions = self.result['input_rescaled'][entry][:, 4] @@ -878,7 +884,7 @@ def get_interactions(self, entry, drop_nonprimary_particles=True, volume=None) - out_interaction_list = [] for e in entries: - volume = e % self._num_volumes if volume is not None else volume + volume = e % self._num_volumes if self.vb is not None else volume particles = self.get_particles(entry, only_primaries=drop_nonprimary_particles, volume=volume) out = group_particles_to_interactions_fn(particles) for ia in out: @@ -1118,7 +1124,7 @@ def get_true_fragments(self, entry, verbose=False, volume=None) -> List[TruthPar out_fragments_list = [] for entry in entries: - volume = entry % self._num_volumes if volume is not None else volume + volume = entry % self._num_volumes # Both are "adapted" labels labels = self.data_blob['cluster_label'][entry] @@ -1230,7 +1236,7 @@ def get_true_particles(self, entry, only_primaries=True, out_particles_list = [] global_entry = entry for entry in entries: - volume = entry % self._num_volumes if volume is not None else volume + volume = entry % self._num_volumes labels = self.data_blob['cluster_label'][entry] if self.deghosting: @@ -1373,7 +1379,7 @@ def get_true_interactions(self, entry, drop_nonprimary_particles=True, entries = self._get_entries(entry, volume) out_interactions_list = [] for e in entries: - volume = e % self._num_volumes if volume is not None else volume + volume = e % self._num_volumes if self.vb is not None else volume true_particles = self.get_true_particles(entry, only_primaries=drop_nonprimary_particles, volume=volume) out = group_particles_to_interactions_fn(true_particles, get_nu_id=True, mode='truth') @@ -1404,7 +1410,7 @@ def get_true_vertices(self, entry, volume=None): entries = self._get_entries(entry, volume) out = {} for entry in entries: - volume = entry % self._num_volumes if volume is not None else volume + volume = entry % self._num_volumes if self.vb is not None else volume inter_idxs = np.unique( self.data_blob['cluster_label'][entry][:, 7].astype(int)) for inter_idx in inter_idxs: @@ -1439,7 +1445,7 @@ def match_particles(self, entry, entries = self._get_entries(entry, volume) all_matches = [] for e in entries: - volume = e % self._num_volumes if volume is not None else volume + volume = e % self._num_volumes if self.vb is not None else volume if mode == 'pred_to_true': # Match each pred to one in true particles_from = self.get_particles(entry, only_primaries=only_primaries, volume=volume) @@ -1451,10 +1457,9 @@ def match_particles(self, entry, else: raise ValueError("Mode {} is not valid. For matching each"\ " prediction to truth, use 'pred_to_true' (and vice versa).".format(mode)) + all_kwargs = {"min_overlap": self.min_overlap_count, "overlap_mode": self.overlap_mode, **kwargs} matched_pairs, _, _ = match_particles_fn(particles_from, particles_to, - min_overlap=self.min_overlap_count, - overlap_mode=self.overlap_mode, - **kwargs) + **all_kwargs) all_matches.extend(matched_pairs) return all_matches @@ -1485,7 +1490,7 @@ def match_interactions(self, entry, mode='pred_to_true', entries = self._get_entries(entry, volume) all_matches, all_counts = [], [] for e in entries: - volume = e % self._num_volumes if volume is not None else volume + volume = e % self._num_volumes if self.vb is not None else volume if mode == 'pred_to_true': ints_from = self.get_interactions(entry, drop_nonprimary_particles=drop_nonprimary_particles, volume=volume) ints_to = self.get_true_interactions(entry, drop_nonprimary_particles=drop_nonprimary_particles, volume=volume) @@ -1496,9 +1501,9 @@ def match_interactions(self, entry, mode='pred_to_true', raise ValueError("Mode {} is not valid. For matching each"\ " prediction to truth, use 'pred_to_true' (and vice versa).".format(mode)) + all_kwargs = {"min_overlap": self.min_overlap_count, **kwargs} matched_interactions, _, counts = match_interactions_fn(ints_from, ints_to, - min_overlap=self.min_overlap_count, - **kwargs) + **all_kwargs) if match_particles: for interactions in matched_interactions: From 40ba7e0a52df91cb743cbdcda2d849d05d838647 Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Thu, 17 Nov 2022 15:56:57 -0800 Subject: [PATCH 51/52] Add option in standalone GrapPA to break individual input clusters into fragments --- mlreco/models/grappa.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/mlreco/models/grappa.py b/mlreco/models/grappa.py index 1631debd..97f9c335 100644 --- a/mlreco/models/grappa.py +++ b/mlreco/models/grappa.py @@ -142,6 +142,7 @@ def __init__(self, cfg, name='grappa', batch_col=0, coords_col=(1, 4)): self.opt_dir_max_dist = self.dir_max_dist == 'optimize' self.add_local_dedxs = base_config.get('add_local_dedxs', False) self.dedx_max_dist = base_config.get('dedx_max_dist', 5) + self.break_clusters = base_config.get('break_clusters', False) self.shuffle_clusters = base_config.get('shuffle_clusters', False) # *Deprecated* but kept for backward compatibility: @@ -283,6 +284,15 @@ def forward(self, data, clusts=None, groups=None, points=None, extra_feats=None, self.node_min_size, self.source_col, cluster_classes=self.node_type) + if self.break_clusters: + from sklearn.cluster import DBSCAN + dbscan = DBSCAN(eps=1.1, min_samples=1, metric='chebyshev') + broken_clusts = [] + for c in clusts: + labels = dbscan.fit(cluster_data[c, self.coords_index[0]:self.coords_index[1]].detach().cpu().numpy()).labels_ + for l in np.unique(labels): + broken_clusts.append(c[labels==l]) + clusts = broken_clusts # If requested, shuffle the order in which the clusters are listed (used for debugging) if self.shuffle_clusters: @@ -311,7 +321,6 @@ def forward(self, data, clusts=None, groups=None, points=None, extra_feats=None, batch_ids = get_cluster_batch(cluster_data, clusts, batch_index=self.batch_index) clusts_split, cbids = split_clusts(clusts, batch_ids, batches, bcounts) - result['clusts'] = [clusts_split] # If necessary, compute the cluster distance matrix dist_mat = None @@ -348,12 +357,12 @@ def forward(self, data, clusts=None, groups=None, points=None, extra_feats=None, edge_index = restrict_graph(edge_index, dist_mat, self.edge_max_dist) else: # Here get_cluster_primary_label is used to ensure that Michel/Delta showers are given the appropriate semantic label - classes = extra_feats[:,-1].cpu().numpy().astype(int) if extra_feats is not None else get_cluster_primary_label(cluster_data, clusts, -1).astype(int) + if self.source_col == 5: classes = extra_feats[:,-1].cpu().numpy().astype(int) if extra_feats is not None else get_cluster_label(cluster_data, clusts, -1).astype(int) + if self.source_col == 6: classes = extra_feats[:,-1].cpu().numpy().astype(int) if extra_feats is not None else get_cluster_primary_label(cluster_data, clusts, -1).astype(int) edge_index = restrict_graph(edge_index, dist_mat, self.edge_max_dist, classes) # Update result with a list of edges for each batch id edge_index_split, ebids = split_edge_index(edge_index, batch_ids, batches) - result['edge_index'] = [edge_index_split] # Obtain node and edge features x = self.node_encoder(cluster_data, clusts) @@ -376,9 +385,9 @@ def forward(self, data, clusts=None, groups=None, points=None, extra_feats=None, else: x = torch.cat([x, dirs_start.float()], dim=1) if self.add_local_dedxs: - dedxs_start = get_cluster_dedxs(cluster_data[:, self.coords_index[0]:self.coords_index[1]], cluster_data[:,4], points[:,:3], clusts, self.dedx_max_dir) + dedxs_start = get_cluster_dedxs(cluster_data[:, self.coords_index[0]:self.coords_index[1]], cluster_data[:,4], points[:,:3], clusts, self.dedx_max_dist) if self.add_local_dedxs != 'start': - dedxs_end = get_cluster_dedxs(cluster_data[:, self.coords_index[0]:self.coords_index[1]], cluster_data[:,4], points[:,3:6], clusts, self.dedx_max_dir) + dedxs_end = get_cluster_dedxs(cluster_data[:, self.coords_index[0]:self.coords_index[1]], cluster_data[:,4], points[:,3:6], clusts, self.dedx_max_dist) x = torch.cat([x, dedxs_start.reshape(-1,1).float(), dedxs_end.reshape(-1,1).float()], dim=1) else: x = torch.cat([x, dedxs_start.reshape(-1,1).float()], dim=1) From c13cc8caa0d9ca684c4bb1f35cadd38884f6bbbf Mon Sep 17 00:00:00 2001 From: Francois Drielsma Date: Thu, 17 Nov 2022 16:05:03 -0800 Subject: [PATCH 52/52] Added option to restrict the maximum GrapPA input graph size (to deal with memory). Default: 2e6 edges --- mlreco/models/grappa.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mlreco/models/grappa.py b/mlreco/models/grappa.py index 97f9c335..b8fc5491 100644 --- a/mlreco/models/grappa.py +++ b/mlreco/models/grappa.py @@ -160,6 +160,7 @@ def __init__(self, cfg, name='grappa', batch_col=0, coords_col=(1, 4)): self.edge_max_dist = base_config.get('edge_max_dist', -1) self.edge_dist_metric = base_config.get('edge_dist_metric', 'voxel') self.edge_knn_k = base_config.get('edge_knn_k', 5) + self.edge_max_count = base_config.get('edge_max_count', 2e6) # Turn the edge_max_dist value into a matrix if not isinstance(self.edge_max_dist, list): self.edge_max_dist = [self.edge_max_dist] @@ -321,6 +322,11 @@ def forward(self, data, clusts=None, groups=None, points=None, extra_feats=None, batch_ids = get_cluster_batch(cluster_data, clusts, batch_index=self.batch_index) clusts_split, cbids = split_clusts(clusts, batch_ids, batches, bcounts) + result['clusts'] = [clusts_split] + if self.edge_max_count > -1: + _, cnts = np.unique(batch_ids, return_counts=True) + if np.sum([c*(c-1) for c in cnts]) > 2*self.edge_max_count: + return result # If necessary, compute the cluster distance matrix dist_mat = None @@ -363,6 +369,9 @@ def forward(self, data, clusts=None, groups=None, points=None, extra_feats=None, # Update result with a list of edges for each batch id edge_index_split, ebids = split_edge_index(edge_index, batch_ids, batches) + result['edge_index'] = [edge_index_split] + if edge_index.shape[1] > self.edge_max_count: + return result # Obtain node and edge features x = self.node_encoder(cluster_data, clusts)