Add more shape properties, update classification and interaction pair…

…ing (#510) * Add waveform shape properties * Add tight coincidence property following suggestion of @marcschumann * Classification based on new properties * Order S1s by (tight_coincidene, area) for pairing
XENON1T · Feb 17, 2017 · 4e0c9dd · 4e0c9dd
1 parent ed81a7c
commit 4e0c9dd
Show file tree

Hide file tree

Showing 6 changed files with 63 additions and 110 deletions.
diff --git a/pax/config/_base.ini b/pax/config/_base.ini
@@ -82,6 +82,13 @@ timeout_after_sec = 300
 # it isn't (and shouldn't) be used for anything else
 subtract_reference_baseline_only_for_raw_waveform = False
 
+
+[BasicProperties.SumWaveformProperties]
+# Window within which hit maxima can count towards the tight coincidence level.
+# Must be an odd number of digitizer samples (otherwise will be slightly wider than you specify)
+tight_coincidence_window = 50 * ns
+
+
 [BuildInteractions.BasicInteractionProperties]
 # Statistic to use for the S1 pattern goodness of fit: same options as for PosRecTopPatternFit
 s1_pattern_statistic = 'likelihood_poisson'

diff --git a/pax/datastructure.py b/pax/datastructure.py
@@ -7,6 +7,7 @@
 names of functionality between major releases.  You may add variables in minor
 releases.  Patch releases cannot modify this.
 """
+import operator
 from collections import namedtuple
 import numpy as np
 import six
@@ -259,7 +260,7 @@ def saturated_channels(self):
     largest_hit_area = float('nan')
 
     # Channel of the largest hit in the peak
-    largest_hit_channel = 0
+    largest_hit_channel = INT_NAN
 
     @property
     def does_channel_contribute(self):
@@ -271,6 +272,10 @@ def contributing_channels(self):
         """List of channels which contribute one or more hits to this peak"""
         return np.where(self.does_channel_contribute)[0]
 
+    #: Number of channels that have a hit maximum within a short (configurable) window around the peak's sum
+    #: waveform maximum.
+    tight_coincidence = INT_NAN
+
     ##
     # Time distribution information
     ##
@@ -290,6 +295,11 @@ def contributing_channels(self):
     #: First element (0) is always zero, last element (10) is the full range of the peak.
     range_area_decile = np.zeros(11, dtype=np.float)
 
+    #: Time (ns) from the area decile point to the area midpoint.
+    #: If you want to know the time until some other point (say the sum waveform maximum),
+    #: just add the difference between that point and the area midpoint.
+    area_decile_from_midpoint = np.zeros(11, dtype=np.float)
+
     @property
     def range_50p_area(self):
         return self.range_area_decile[5]
@@ -719,7 +729,7 @@ def length(self):
         """
         return int(self.duration() / self.sample_duration)
 
-    def s1s(self, detector='tpc', sort_key='area', reverse=True):  # noqa
+    def s1s(self, detector='tpc', sort_key=('tight_coincidence', 'area'), reverse=True):  # noqa
         """List of S1 (scintillation) signals in this event
         In the ROOT class output, this returns a list of integer indices in event.peaks
         Inside pax, returns a list of :class:`pax.datastructure.Peak` objects
@@ -792,8 +802,10 @@ def get_peaks_by_type(self, desired_type='all', detector='tpc', sort_key='area',
             peaks.append(peak)
 
         # Sort the peaks by your sort key
+        if isinstance(sort_key, (str, bytes)):
+            sort_key = [sort_key]
         peaks = sorted(peaks,
-                       key=lambda x: getattr(x, sort_key),
+                       key=operator.attrgetter(*sort_key),
                        reverse=reverse)
 
         return peaks

diff --git a/pax/plugins/interaction_processing/BuildInteractions.py b/pax/plugins/interaction_processing/BuildInteractions.py
@@ -10,7 +10,8 @@ class BuildInteractions(plugin.TransformPlugin):
       - The S2 occurs after the S1
       - The S2 is larger than s2_pairing_threshold (avoids single electrons)
     Mo more than pair_n_s2s S2s and pair_n_s1s S1s will be paired with each other
-    Pairing will start from the largest S1 and S2, then move down S2s in area and eventually down S1s in area
+    Pairing will start from the S1 with largest tight_coincidence and the largest S2,
+    then move down S2s in tight_coincidence and eventually down S1s in area.
 
     xy_posrec_preference = ['algo1', 'algo2', ...]
     """

diff --git a/pax/plugins/peak_processing/BasicProperties.py b/pax/plugins/peak_processing/BasicProperties.py
@@ -54,12 +54,14 @@ class SumWaveformProperties(plugin.TransformPlugin):
     """Computes properties based on the hits-only sum waveform"""
 
     def startup(self):
-        self.wv_field_len = int(self.config['peak_waveform_length'] / self.config['sample_duration']) + 1
+        self.dt = dt = self.config['sample_duration']
+        self.wv_field_len = int(self.config['peak_waveform_length'] / dt) + 1
+        self.tight_coincidence_samples = self.config['tight_coincidence_window'] // dt
         if not self.wv_field_len % 2:
             raise ValueError('peak_waveform_length must be an even multiple of the sample size')
 
     def transform_event(self, event):
-        dt = event.sample_duration
+        dt = self.dt
         field_length = self.wv_field_len
         for peak in event.peaks:
             peak.sum_waveform = np.zeros(field_length, dtype=peak.sum_waveform.dtype)
@@ -100,10 +102,22 @@ def transform_event(self, event):
             # Amplitude at the maximum
             peak.height = w[max_idx]
 
-            # Compute fraction of area in central deciles
-            peak.area_midpoint, peak.range_area_decile = compute_area_deciles(w)
-            peak.range_area_decile *= dt
-            peak.area_midpoint += peak.left * dt
+            # Compute area decile points
+            # TODO: reactivate tests after refactor
+            area_times = np.ones(21) * float('nan')
+            integrate_until_fraction(w, fractions_desired=np.linspace(0, 1, 21), results=area_times)
+            area_times *= dt
+            area_midpoint = area_times[10]
+
+            # Store widths and rise times
+            peak.range_area_decile = area_times[10:] - area_times[10::-1]
+            peak.area_decile_from_midpoint = area_times[::2] - area_midpoint
+
+            # Compute a tight coincidence count (useful for distinguishing S1s from junk)
+            x = peak.hits['index_of_maximum']
+            l = peak.index_of_maximum - self.tight_coincidence_samples
+            r = peak.index_of_maximum + self.tight_coincidence_samples
+            peak.tight_coincidence = len(np.unique(peak.hits['channel'][(x >= l) & (x <= r)]))
 
             # Store the waveform; for tpc also store the top waveform
             put_w_in_center_of_field(w, peak.sum_waveform, cog_idx)
@@ -125,18 +139,6 @@ def transform_event(self, event):
         return event
 
 
-def compute_area_deciles(w):
-    """Return (index of mid area, array of the 0th ... 10 th area decile ranges in samples) of w
-    e.g. range_area_decile[5] = range of 50% area = distance (in samples)
-    between point of 25% area and 75% area (with boundary samples added fractionally).
-    First element (0) of array is always zero, last element (10) is the length of w in samples.
-    """
-    fractions_desired = np.linspace(0, 1, 21)
-    index_of_area_fraction = np.ones(len(fractions_desired)) * float('nan')
-    integrate_until_fraction(w, fractions_desired, index_of_area_fraction)
-    return index_of_area_fraction[10], (index_of_area_fraction[10:] - index_of_area_fraction[10::-1]),
-
-
 # @numba.jit(numba.void(numba.float32[:], numba.float64[:], numba.float64[:]),
 #            nopython=True, cache=True)
 # For some reason numba doesn't clean up its memory properly for this function... leave it in python for now

diff --git a/pax/plugins/peak_processing/ClassifyPeaks.py b/pax/plugins/peak_processing/ClassifyPeaks.py
@@ -1,93 +1,31 @@
 from pax import plugin, units
-import numpy as np
 from scipy import interpolate
 
 
 class AdHocClassification1T(plugin.TransformPlugin):
 
-    def transform_event(self, event):
-
-        if self.config.get('revert_to_old'):
-            ##
-            # OLD CLASSIFICATION, will be deleted soonish
-            ##
-            for peak in event.peaks:
-                # Don't work on noise and lone_hit
-                if peak.type in ('noise', 'lone_hit'):
-                    continue
-
-                area = peak.area
-                width = peak.range_area_decile[5]
-
-                if width > 0.1 * area**3:
-                    peak.type = 'unknown'
-
-                elif width < 50:
-                    peak.type = 's1'
-
-                else:
-                    if width > 3.5e2 / area**0.1 or width > 1.5 * area:
-                        peak.type = 's2'
-                    else:
-                        peak.type = 's1'
-
-        else:
-            ##
-            # NEW CLASSIFICATION
-            ##
-            min_s2_aft = 0.3
-
-            x_s1 = np.array([0, 40,  40 + 1e-9,  70, 120, 121])
-            y_s1 = np.array([1,  1,  0.7,      0.3, 0,     0])
-            s1_classification_bound = interpolate.interp1d(x_s1, y_s1, fill_value='extrapolate')
-            s2_classification_bound = interpolate.interp1d(x_s1 + 20,
-                                                           np.clip(y_s1, min_s2_aft, 1),
-                                                           fill_value='extrapolate')
-
-            # Sort the peaks by left boundary, so we can keep track of the largest peak seen so far
-            event.peaks = sorted(event.peaks, key=lambda x: x.left)
+    def startup(self):
+        self.s1_rise_time_bound = interpolate.interp1d([0, 10, 25, 100],
+                                                       [100, 100, 70, 70],
+                                                       fill_value='extrapolate', kind='linear')
 
-            largest_peak_area_seen = 0
-
-            for peak in event.peaks:
-                # Don't work on noise and lone_hit
-                if peak.type in ('noise', 'lone_hit'):
-                    continue
-
-                # Peaks with a low coincidence level are labeled 'unknown' immediately
-                if peak.n_contributing_channels <= 3:
-                    peak.type = 'unknown'
-                    continue
-
-                area = peak.area
-                width = peak.range_area_decile[5]
-                aft = peak.area_fraction_top
+    def transform_event(self, event):
 
-                if area < 50:
-                    # Decide on S1/S2 based on area fraction top and width.
-                    # For widths < 40 ns, always S1; > 120 ns, always S2. In between it depends also on aft.
-                    if aft < s1_classification_bound(width):
-                        if largest_peak_area_seen > 5000:
-                            # We're already in the tail of a large peak. If we see an S1-like peak here, it is most
-                            # likely a (fragment of) a small single electron
-                            peak.type = 'unknown'
-                        else:
-                            peak.type = 's1'
-                    elif aft > s2_classification_bound(width):
-                        peak.type = 's2'
-                    else:
-                        peak.type = 'unknown'
+        for peak in event.peaks:
+            # Don't work on noise and lone_hit
+            if peak.type in ('noise', 'lone_hit'):
+                continue
 
-                else:
-                    if width < 70 or (width < 3.5e2 / area**0.1 and width < 1.5 * area):
-                        peak.type = 's1'
-                    else:
-                        if aft < min_s2_aft:
-                            peak.type == 'unknown'
-                        else:
-                            peak.type = 's2'
+            # Peaks with a low coincidence level are labeled 'unknown' immediately
+            if peak.tight_coincidence <= 2:
+                peak.type = 'unknown'
+                continue
 
-                largest_peak_area_seen = max(largest_peak_area_seen, area)
+            # Peaks that rise fast are S1s, the rest are S2s
+            if -peak.area_decile_from_midpoint[1] > self.s1_rise_time_bound(peak.area):
+                peak.type = 's1'
+            else:
+                peak.type = 's2'
 
         return event
 

diff --git a/tests/test_peak_properties.py b/tests/test_peak_properties.py
@@ -3,8 +3,7 @@
 import numpy as np
 from numpy import testing as np_testing
 
-from pax.plugins.peak_processing.BasicProperties import integrate_until_fraction, \
-    put_w_in_center_of_field, compute_area_deciles
+from pax.plugins.peak_processing.BasicProperties import integrate_until_fraction, put_w_in_center_of_field
 
 
 class TestPeakProperties(unittest.TestCase):
@@ -59,12 +58,6 @@ def test_store_waveform(self):
         put_w_in_center_of_field(np.ones(20), field, 10)
         np_testing.assert_equal(field, np.array([1, 1, 1, 1, 1]))
 
-    def test_area_deciles(self):
-        w = np.ones(100, dtype=np.float32)
-        midpoint, deciles = compute_area_deciles(w)
-        self.assertAlmostEqual(midpoint, 50, places=4)
-        np_testing.assert_almost_equal(deciles, np.linspace(0, 100, 11), decimal=4)
-
 
 if __name__ == '__main__':
     unittest.main()