Skip to content

Commit

Permalink
Merge pull request #85 from Temigo/temigo
Browse files Browse the repository at this point in the history
Update full chain model & other fixes
  • Loading branch information
Temigo authored Mar 4, 2021
2 parents b952d1c + a97b93b commit bfd1083
Show file tree
Hide file tree
Showing 35 changed files with 1,994 additions and 2,100 deletions.
587 changes: 0 additions & 587 deletions mlreco/ghost_chain_2.py

This file was deleted.

38 changes: 25 additions & 13 deletions mlreco/iotools/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class LArCVDataset(Dataset):
can be configured with arbitrary number of parser functions where each function can take arbitrary number of
LArCV event data objects. The assumption is that each data chunk respects the LArCV event boundary.
"""
def __init__(self, data_schema, data_keys, limit_num_files=0, limit_num_samples=0, event_list=None):
def __init__(self, data_schema, data_keys, limit_num_files=0, limit_num_samples=0, event_list=None, skip_event_list=None):
"""
Args: data_dirs ..... a list of data directories to find files (up to 10 files read from each dir)
data_schema ... a dictionary of string <=> list of strings. The key is a unique name of a data chunk in a batch.
Expand All @@ -24,6 +24,7 @@ def __init__(self, data_schema, data_keys, limit_num_files=0, limit_num_samples=
limit_num_files ... an integer limiting number of files to be taken per data directory
limit_num_samples ... an integer limiting number of samples to be taken per data
event_list ... a list of integers to specify which event (ttree index) to process
skip_event_list ... a list of integers to specify which events (ttree index) to skip
"""

# Create file list
Expand Down Expand Up @@ -87,6 +88,10 @@ def __init__(self, data_schema, data_keys, limit_num_files=0, limit_num_samples=
self._event_list=event_list[np.where(event_list < self._entries)]
self._entries = len(self._event_list)

if skip_event_list is not None:
self._event_list = self._event_list[~np.isin(self._event_list, skip_event_list)]
self._entries = len(self._event_list)

# Set total sample size
if limit_num_samples > 0 and self._entries > limit_num_samples:
self._entries = limit_num_samples
Expand All @@ -109,23 +114,30 @@ def list_data(f):
return data

@staticmethod
def create(cfg):
data_schema = cfg['schema']
data_keys = cfg['data_keys']
lnf = 0 if not 'limit_num_files' in cfg else int(cfg['limit_num_files'])
lns = 0 if not 'limit_num_samples' in cfg else int(cfg['limit_num_samples'])
event_list = None
if 'event_list' in cfg:
if os.path.isfile(cfg['event_list']):
event_list = [int(val) for val in open(cfg['event_list'],'r').read().replace(',',' ').split() if val.digit()]
def get_event_list(cfg, key):
event_list = None
if key in cfg:
if os.path.isfile(cfg[key]):
event_list = [int(val) for val in open(cfg[key],'r').read().replace(',',' ').split() if val.digit()]
else:
try:
import ast
event_list = ast.literal_eval(cfg['event_list'])
event_list = ast.literal_eval(cfg[key])
except SyntaxError:
print('iotool.dataset.event_list has invalid representation:',event_list)
print('iotool.dataset.%s has invalid representation:' % key,event_list)
raise ValueError
return LArCVDataset(data_schema=data_schema, data_keys=data_keys, limit_num_files=lnf, event_list=event_list)
return event_list

@staticmethod
def create(cfg):
data_schema = cfg['schema']
data_keys = cfg['data_keys']
lnf = 0 if not 'limit_num_files' in cfg else int(cfg['limit_num_files'])
lns = 0 if not 'limit_num_samples' in cfg else int(cfg['limit_num_samples'])
event_list = LArCVDataset.get_event_list(cfg, 'event_list')
skip_event_list = LArCVDataset.get_event_list(cfg, 'skip_event_list')

return LArCVDataset(data_schema=data_schema, data_keys=data_keys, limit_num_files=lnf, event_list=event_list, skip_event_list=skip_event_list)

def data_keys(self):
return self._data_keys
Expand Down
93 changes: 51 additions & 42 deletions mlreco/iotools/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,17 +218,20 @@ def parse_particle_asis(data):
A function to copy construct & return an array of larcv::Particle
Args:
length 1 array of larcv::EventParticle
(optional: larcv::EventClusterVoxel3D, to translate coordinates)
Return:
a python list of larcv::Particle object
"""
particles = data[0]
particles = [larcv.Particle(p) for p in data[0].as_vector()]

clusters = data[1]
assert particles.as_vector().size() in [clusters.as_vector().size(),clusters.as_vector().size()-1]
#assert data[0].as_vector().size() in [clusters.as_vector().size(),clusters.as_vector().size()-1]

meta = clusters.meta()

particles = [larcv.Particle(p) for p in data[0].as_vector()]
funcs = ["first_step","last_step","position","end_position"]

funcs = ["first_step","last_step","position","end_position","ancestor_position"]
for p in particles:
for f in funcs:
pos = getattr(p,f)()
Expand Down Expand Up @@ -464,6 +467,7 @@ def parse_cluster3d_full(data):
a function to retrieve clusters tensor
args:
length 2 array of larcv::EventClusterVoxel3D and larcv::EventParticle
(optional) array of larcv::EventParticle from `particle_mpv_tree`
return:
a numpy array with the shape (n,3) where 3 represents (x,y,z)
coordinate
Expand All @@ -475,12 +479,16 @@ def parse_cluster3d_full(data):
meta = cluster_event.meta()
num_clusters = cluster_event.as_vector().size()
clusters_voxels, clusters_features = [], []
particle_mpv = None
if len(data) > 2:
particle_mpv = data[2].as_vector()

from mlreco.utils.groups import get_valid_group_id, get_interaction_id, get_nu_id
from mlreco.utils.groups import get_valid_group_id, get_interaction_id, get_nu_id, get_particle_id
#group_ids = get_valid_group_id(cluster_event, particles_v)
group_ids = np.array([p.group_id() for p in particles_v])
inter_ids = get_interaction_id(particles_v)
nu_ids = get_nu_id(cluster_event, particles_v, inter_ids)
nu_ids = get_nu_id(cluster_event, particles_v, inter_ids, particle_mpv=particle_mpv)
pids = get_particle_id(particles_v, nu_ids)

for i in range(num_clusters):
cluster = cluster_event.as_vector()[i]
Expand All @@ -499,13 +507,8 @@ def parse_cluster3d_full(data):
fill_value=group_ids[i], dtype=np.float32)
inter_id = np.full(shape=(cluster.as_vector().size()),
fill_value=inter_ids[i], dtype=np.float32)
t = int(particles_v[i].pdg_code())
if t in TYPE_LABELS.keys():
pdg = np.full(shape=(cluster.as_vector().size()),
fill_value=TYPE_LABELS[t], dtype=np.float32)
else:
pdg = np.full(shape=(cluster.as_vector().size()),
fill_value=-1, dtype=np.float32)
pdg = np.full(shape=(cluster.as_vector().size()),
fill_value=pids[i], dtype=np.float32)
nu_id = np.full(shape=(cluster.as_vector().size()),
fill_value=nu_ids[i], dtype=np.float32)
sem_type = np.full(shape=(cluster.as_vector().size()),
Expand All @@ -523,6 +526,7 @@ def parse_cluster3d_types(data):
a function to retrieve clusters tensor
args:
length 2 array of larcv::EventClusterVoxel3D and larcv::EventParticle
(optional) array of larcv::EventParticle from `particle_mpv_tree`
return:
a numpy array with the shape (n,3) where 3 represents (x,y,z)
coordinate
Expand All @@ -546,12 +550,15 @@ def parse_cluster3d_types(data):
meta = cluster_event.meta()
num_clusters = cluster_event.as_vector().size()
clusters_voxels, clusters_features = [], []
particle_mpv = None
if len(data) > 2:
particle_mpv = data[2].as_vector()

from mlreco.utils.groups import get_valid_group_id, get_interaction_id, get_nu_id
#group_ids = get_valid_group_id(cluster_event, particles_v)
group_ids = np.array([p.group_id() for p in particles_v])
inter_ids = get_interaction_id(particles_v)
nu_ids = get_nu_id(cluster_event, particles_v, inter_ids)
nu_ids = get_nu_id(cluster_event, particles_v, inter_ids, particle_mpv = particle_mpv)

for i in range(num_clusters):
cluster = cluster_event.as_vector()[i]
Expand Down Expand Up @@ -590,35 +597,30 @@ def parse_cluster3d_kinematics(data):
a function to retrieve clusters tensor
args:
length 2 array of larcv::EventClusterVoxel3D and larcv::EventParticle
(optional) array of larcv::EventParticle from `particle_mpv_tree`
return:
a numpy array with the shape (n,3) where 3 represents (x,y,z)
coordinate
a numpy array with the shape (n,6) where 6 is voxel value,
cluster id, group id interaction id, nu id and semantic type, respectively
a numpy array with the shape (n,5) where 5 is voxel value,
cluster id, group id, pdg and momentum respectively
"""
cluster_event = data[0]
particles_v = data[1].as_vector()
TYPE_LABELS = {
22: 0, # photon
11: 1, # e-
-11: 1, # e+
13: 2, # mu-
-13: 2, # mu+
211: 3, # pi+
-211: 3, # pi-
2212: 4, # protons
}
# print(cluster_event)
# assert False
particles_v_asis = parse_particle_asis([data[1], data[0]])

meta = cluster_event.meta()
num_clusters = cluster_event.as_vector().size()
clusters_voxels, clusters_features = [], []
particle_mpv = None
if len(data) > 2:
particle_mpv = data[2].as_vector()

from mlreco.utils.groups import get_valid_group_id, get_interaction_id, get_nu_id
from mlreco.utils.groups import get_valid_group_id, get_interaction_id, get_nu_id, get_particle_id
#group_ids = get_valid_group_id(cluster_event, particles_v)
group_ids = np.array([p.group_id() for p in particles_v])
inter_ids = get_interaction_id(particles_v)
nu_ids = get_nu_id(cluster_event, particles_v, inter_ids)
nu_ids = get_nu_id(cluster_event, particles_v, inter_ids, particle_mpv = particle_mpv)
pids = get_particle_id(particles_v, nu_ids)

for i in range(num_clusters):
cluster = cluster_event.as_vector()[i]
Expand All @@ -635,21 +637,25 @@ def parse_cluster3d_kinematics(data):
group_id = np.full(shape=(cluster.as_vector().size()),
#fill_value=particles_v[i].group_id(), dtype=np.float32)
fill_value=group_ids[i], dtype=np.float32)
t = int(particles_v[i].pdg_code())
px = particles_v[i].px()
py = particles_v[i].py()
pz = particles_v[i].pz()
p = np.sqrt(px**2 + py**2 + pz**2) / 1000.0
p = np.full(shape=(cluster.as_vector().size()),
fill_value=p, dtype=np.float32)
if t in TYPE_LABELS.keys():
pdg = np.full(shape=(cluster.as_vector().size()),
fill_value=TYPE_LABELS[t], dtype=np.float32)
else:
pdg = np.full(shape=(cluster.as_vector().size()),
fill_value=-1, dtype=np.float32)
pdg = np.full(shape=(cluster.as_vector().size()),
fill_value=pids[i], dtype=np.float32)
vtx_x = np.full(shape=(cluster.as_vector().size()),
fill_value=particles_v_asis[i].ancestor_position().x(), dtype=np.float32)
vtx_y = np.full(shape=(cluster.as_vector().size()),
fill_value=particles_v_asis[i].ancestor_position().y(), dtype=np.float32)
vtx_z = np.full(shape=(cluster.as_vector().size()),
fill_value=particles_v_asis[i].ancestor_position().z(), dtype=np.float32)
is_primary = np.full(shape=(cluster.as_vector().size()),
fill_value=float((nu_ids[i] > 0) and (particles_v[i].parent_id() == particles_v[i].id()) and (particles_v[i].group_id() == particles_v[i].id())),
dtype=np.float32)
clusters_voxels.append(np.stack([x, y, z], axis=1))
clusters_features.append(np.column_stack([value, cluster_id, group_id, pdg, p]))
clusters_features.append(np.column_stack([value, cluster_id, group_id, pdg, p, vtx_x, vtx_y, vtx_z, is_primary]))
np_voxels = np.concatenate(clusters_voxels, axis=0)
np_features = np.concatenate(clusters_features, axis=0)
# mask = np_features[:, 6] == np.unique(np_features[:, 6])[0]
Expand All @@ -659,14 +665,17 @@ def parse_cluster3d_kinematics(data):


def parse_cluster3d_kinematics_clean(data):
"""
Similar to parse_cluster3d_kinematics, but removes overlap voxels.
Additional input necessary: larcv::EventSparseTensor3D (put it last)
"""
grp_voxels, grp_data = parse_cluster3d_kinematics(data)
_, cluster_data = parse_cluster3d_full(data)
img_voxels, img_data = parse_sparse3d_scn([data[2]])
img_voxels, img_data = parse_sparse3d_scn([data[-1]])

grp_data = np.concatenate([grp_data, cluster_data[:, -1][:, None]], axis=1)
grp_voxels, grp_data = clean_data(grp_voxels, grp_data, img_voxels, img_data, data[0].meta())
return grp_voxels, grp_data[:, :-1]

return grp_voxels, grp_data#[:, :-1]


def parse_cluster3d_full_fragment(data):
Expand Down Expand Up @@ -841,8 +850,8 @@ def parse_cluster3d_clean_full(data):
coordinate
a numpy array with the shape (N,4) where 4 represens (value, cluster_id, group_id, sem_type)
"""
grp_voxels, grp_data = parse_cluster3d_full(data)
img_voxels, img_data = parse_sparse3d_scn([data[2]])
grp_voxels, grp_data = parse_cluster3d_full(data[:-1])
img_voxels, img_data = parse_sparse3d_scn([data[-1]])

grp_voxels, grp_data = clean_data(grp_voxels, grp_data, img_voxels, img_data, data[0].meta())

Expand Down
3 changes: 3 additions & 0 deletions mlreco/main_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ def train_loop(handlers):
# Train step
data_blob, result_blob = handlers.trainer.train_step(handlers.data_io_iter)

# result_blob['total_num_points'] = [data_blob['input_data'][0].shape[0]]
# result_blob['total_nonghost_points'] = [(data_blob['segment_label'][0][:, -1] < 5).sum().item()]

# Save snapshot
if checkpt_step:
handlers.trainer.save_state(handlers.iteration)
Expand Down
Binary file removed mlreco/models/.factories.py.swp
Binary file not shown.
1 change: 0 additions & 1 deletion mlreco/models/cluster_cnn/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def cluster_model_construct(name):

def spice_loss_construct(name):
loss_fns = spice_loss_dict()
print(name)
if not name in loss_fns:
raise Exception("Unknown clustering loss function name provided")
return loss_fns[name]
2 changes: 1 addition & 1 deletion mlreco/models/cluster_cnn/losses/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def regularization_loss(cluster_means):
def margin_smoothing_loss(sigma, sigma_means, labels, margin=0):
x = sigma[:, None]
mu = sigma_means[None, :]
l = torch.clamp(torch.abs(x-mu) - margin, min=0)**2
l = torch.sqrt(torch.clamp(torch.abs(x-mu) - margin, min=0)**2 + 1e-6)
l = torch.gather(l, 1, labels.view(-1, 1)).squeeze()
loss = torch.mean(scatter_mean(l, labels))
return loss
Expand Down
19 changes: 10 additions & 9 deletions mlreco/models/cluster_cnn/losses/spatial_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .lovasz import mean, lovasz_hinge_flat, StableBCELoss, iou_binary
from .misc import *
from collections import defaultdict

from pprint import pprint

class MaskBCELoss(nn.Module):
'''
Expand Down Expand Up @@ -186,8 +186,6 @@ def forward(self, out, segment_label, group_label):
res.update(loss_avg)
res.update(acc_avg)

print(acc_avg)

return res


Expand Down Expand Up @@ -424,16 +422,18 @@ def get_per_class_probabilities(self, embeddings, margins, labels):
for i, c in enumerate(cluster_labels):
index = (labels == c)
mask = torch.zeros(embeddings.shape[0]).to(device)
mask[index] = 1
mask[~index] = 0
mask[index] = 1.0
mask[~index] = 0.0
sigma = torch.mean(margins[index], dim=0)
dists = torch.sum(torch.pow(embeddings - centroids[i], 2), dim=1)
p = torch.exp(-dists / (2 * torch.pow(sigma, 2) + 1e-8))
p = torch.clamp(torch.exp(-dists / (2 * torch.pow(sigma, 2))), min=0, max=1)
logits = logit_fn(p, eps=1e-6)
# print(logits.shape)
probs[index] = p[index]
loss += lovasz_hinge_flat(2 * p - 1, mask)
loss += lovasz_hinge_flat(logits, mask).mean()
accuracy += float(iou_binary(p > 0.5, mask, per_image=False))
sigma_detach = sigma.detach()
smoothing_loss += torch.mean(torch.norm(margins[index] - sigma_detach, dim=1))
smoothing_loss += torch.mean(torch.norm(margins[index].view(-1, 1) - sigma_detach, dim=1))

loss /= n_clusters
smoothing_loss /= n_clusters
Expand Down Expand Up @@ -469,10 +469,11 @@ def combine_multiclass(self, embeddings, margins, seediness, slabels, clabels):
if int(sc) == 4:
continue
index = (slabels == sc)
clabels_unique, _ = unique_label_torch(clabels[index])
mask_loss, smoothing_loss, inter_loss, probs, acc = \
self.get_per_class_probabilities(
embeddings[index], margins[index],
clabels[index])
clabels_unique)
prob_truth = probs.detach()
seed_loss = self.l2loss(prob_truth, seediness[index].squeeze(1))
total_loss = self.embedding_weight * mask_loss \
Expand Down
Loading

0 comments on commit bfd1083

Please sign in to comment.