diff --git a/dist/gdas-0.2.9.tar.gz b/dist/gdas-0.2.9.tar.gz deleted file mode 100644 index b78f354..0000000 Binary files a/dist/gdas-0.2.9.tar.gz and /dev/null differ diff --git a/docs/_modules/gdas/epower.html b/docs/_modules/gdas/epower.html index da36dc6..debfce7 100644 --- a/docs/_modules/gdas/epower.html +++ b/docs/_modules/gdas/epower.html @@ -93,8 +93,6 @@

Source code for gdas.epower

     #print strain.insert_strain_option_group.__dict__
     #print psd.insert_psd_option_group.__dict__
     sample_rate = ts_data.sample_rate
-    print sample_rate
-    quit()
     nchans,band,flow = check_filtering_settings(sample_rate,nchans,band,fmin,fmax)
     seg_len,fd_psd,lal_psd = calculate_psd(ts_data,sample_rate,psd_segment_length,psd_segment_stride,psd_estimation)
     window, spec_corr = calculate_spectral_correlation(seg_len,'tukey',window_fraction=window_fraction)
@@ -460,7 +458,7 @@ 

Source code for gdas.epower

     s_j_nb_avg = uw_ss_ii.sum() / 2 + uw_ss_ij.sum()
     s_j_nb_avg *= delta_f
     s_j_nb_denom = s_j_b_avg.sum() + 2 * 2 / filter_len * \
-        numpy.sum(numpy.sqrt(s_j_b_avg[:-1] * s_j_b_avg[1:]) * w_ss_ij)
+                   numpy.sum(numpy.sqrt(s_j_b_avg[:-1] * s_j_b_avg[1:]) * w_ss_ij)
     # eqn. 62
     uw_ups_ratio = s_j_nb_avg / s_j_nb_denom
     # eqn. 63 -- approximation of unwhitened signal energy time series
diff --git a/docs/_modules/gdas/plots.html b/docs/_modules/gdas/plots.html
index 97dd0ce..1e20301 100644
--- a/docs/_modules/gdas/plots.html
+++ b/docs/_modules/gdas/plots.html
@@ -44,7 +44,7 @@ 

gdas.plots

Source code for gdas.plots

-import matplotlib,numpy
+import matplotlib,numpy,mlpy
 matplotlib.use('Agg')
 from astropy.units        import Quantity
 from matplotlib           import pyplot
@@ -54,6 +54,8 @@ 

Source code for gdas.plots

 from gwpy.spectrogram     import Spectrogram
 from gwpy.table.lsctables import SnglBurstTable
 from gwpy.timeseries      import TimeSeries
+from pylab                import *
+from scipy                import signal
 
 
[docs]def plot_activity(full_seglist): """ @@ -254,6 +256,51 @@

Source code for gdas.plots

         cnt += tmp
     plot_spectrogram(dof_tiles.T,fname='%s/tf_%ichans_%02idof.png'%(segfolder,nc_sum+1,2*j))
     plot.savefig("%s/bands.png"%(segfolder))
+
+def wavelet(ts_data):
+    z = numpy.array([float(i) for i in ts_data])
+    t = numpy.array([float(i) for i in ts_data.sample_times])
+    # Decimate magnetic field data to 1 sample/second
+    rate = [5,10,10] if ts_data.sample_rate==500 else [8,8,8]
+    for i in rate:
+        z = signal.decimate(z,i,zero_phase=True)
+    # Extract time every 500 sample
+    t = [t[n*ts_data.sample_rate] for n in range(len(t)/ts_data.sample_rate)]
+    # Convert every timing points to scale (hr,min,sec) units
+    s = 60.
+    t = [(t[i]-t[0])/s for i in range(len(t))]
+    # Do wavelet analysis
+    omega0 = 6
+    fct    = "morlet"
+    scales = mlpy.wavelet.autoscales(N=len(z),dt=1,dj=0.05,wf=fct,p=omega0)
+    spec   = mlpy.wavelet.cwt(z,dt=1,scales=scales,wf=fct,p=omega0)
+    freq   = (omega0 + numpy.sqrt(2.0 + omega0 ** 2)) / (4 * numpy.pi * scales[1:]) * 1000
+    idxs   = numpy.where(numpy.logical_or(freq<0.1,1000<freq))[0]
+    spec   = numpy.delete(spec,idxs,0)
+    freq   = numpy.delete(freq,idxs,0)
+    # Initialise axis
+    fig = figure(figsize=(12,8))
+    plt.subplots_adjust(left=0.1, right=1, bottom=0.1, top=0.94, hspace=0, wspace=0)
+    ax1 = fig.add_axes([0.10,0.75,0.70,0.20])
+    ax2 = fig.add_axes([0.10,0.10,0.70,0.60], sharex=ax1)
+    ax3 = fig.add_axes([0.83,0.10,0.03,0.60])
+    # Plot time series
+    ax1.plot(t,abs(z)-numpy.average(abs(z)),'k')
+    ax1.set_ylabel('Magnetic Fields [uT]')
+    # Set up axis range for spectrogram
+    twin_ax = ax2.twinx()
+    twin_ax.set_yscale('log')
+    twin_ax.set_xlim(t[0], t[-1])
+    twin_ax.set_ylim(freq[-1], freq[0])
+    twin_ax.tick_params(which='both', labelleft=True, left=True, labelright=False)
+    # Plot spectrogram
+    img = ax2.imshow(numpy.abs(spec)**2,extent=[t[0],t[-1],freq[-1],freq[0]],
+                     aspect='auto',interpolation='nearest',cmap=cm.jet,norm=mpl.colors.LogNorm()) # cm.cubehelix
+    ax2.tick_params(which='both', labelleft=False, left=False)
+    ax2.set_xlabel('Time [mins]')
+    ax2.set_ylabel('Frequency [mHz]',labelpad=50)
+    fig.colorbar(img, cax=ax3)
+    plt.savefig('wavelet.png')
 
diff --git a/docs/_modules/gdas/retrieve.html b/docs/_modules/gdas/retrieve.html index 1732a48..efd67a5 100644 --- a/docs/_modules/gdas/retrieve.html +++ b/docs/_modules/gdas/retrieve.html @@ -46,7 +46,7 @@

gdas.retrieve

Source code for gdas.retrieve

 """ Retrieving magnetic field data.""" 
 
-import os,glob,h5py,astropy,numpy,astropy
+import os,glob,h5py,astropy,numpy,astropy,scipy
 from astropy.time    import Time
 from datetime        import datetime,timedelta
 from glue.segments   import segment,segmentlist
@@ -54,7 +54,7 @@ 

Source code for gdas.retrieve

 from gwpy.timeseries import TimeSeries,TimeSeriesList
 from pycbc           import types
 
-
[docs]def magfield(station,starttime,endtime,activity=False,rep='/GNOMEDrive/gnome/serverdata/'): +
[docs]def magfield(station,starttime,endtime,activity=False,rep='/GNOMEDrive/gnome/serverdata/',resample=None): """ Glob all files withing user-defined period and extract data. @@ -107,8 +107,11 @@

Source code for gdas.retrieve

     ts_list = generate_timeseries(file_order,setname)
     # Retrieve channel data for all the segments
     full_data = numpy.hstack([retrieve_channel_data(data_order[seg],setname) for seg in seglist])
+    new_sample_rate = sample_rate if resample==None else resample
+    new_data_length = len(full_data)/float(sample_rate)*new_sample_rate
+    full_data = scipy.signal.resample(full_data,int(new_data_length))
     # Models a time series consisting of uniformly sampled scalar values
-    ts_data = types.TimeSeries(full_data,delta_t=1/sample_rate,epoch=seglist[0][0])
+    ts_data = types.TimeSeries(full_data,delta_t=1./new_sample_rate,epoch=seglist[0][0])
     for v in data_order.values():
         v.close()        
     return ts_data,ts_list,activity
@@ -226,7 +229,6 @@

Source code for gdas.retrieve

     sample_rate = dset.attrs["SamplingRate(Hz)"]
     gps_epoch = construct_utc_from_metadata(dset.attrs["Date"], dset.attrs["t0"])
     data = retrieve_channel_data(hfile, setname)
-    print sample_rate,gps_epoch,dset.attrs["Date"],dset.attrs["t0"],dset.attrs["t1"]
     ts_data = TimeSeries(data, sample_rate=sample_rate, epoch=gps_epoch)
     return ts_data
diff --git a/index.rst b/docs/_sources/backup.rst.txt similarity index 99% rename from index.rst rename to docs/_sources/backup.rst.txt index c99a194..85ce8b7 100644 --- a/index.rst +++ b/docs/_sources/backup.rst.txt @@ -1,6 +1,16 @@ GNOME Data Analysis Software ============================ +* :ref:`test2` + + .. _test2: + +.. toctree:: + :maxdepth: 2 + :caption: People & By-laws + + test + .. toctree:: :maxdepth: 2 @@ -466,7 +476,7 @@ We present below a step-by-step procedure followed during the Excess Power searc - :ref:`Exploring tiles with different duration ` - For each given tile's bandwidth, one can investigate different tile's duration. This can be done by exploring different number of degrees of freedom, :math:`d$, which can be calculated as follows: :math:`d=2BT` where :math:`B` and :math:`T` are respectively the bandwidth and duration of the tile. Section 2.2.5 of `Brady et al. `_ gives a great description of how to interpret the number of degrees of freedom. Therefore, by changing the :math:`d$, one can explore multiple tile's duration for different bandwidth. + For each given tile's bandwidth, one can investigate different tile's duration. This can be done by exploring different number of degrees of freedom, :math:`d`, which can be calculated as follows: :math:`d=2BT` where :math:`B` and :math:`T` are respectively the bandwidth and duration of the tile. Section 2.2.5 of `Brady et al. `_ gives a great description of how to interpret the number of degrees of freedom. Therefore, by changing the :math:`d`, one can explore multiple tile's duration for different bandwidth. - :ref:`Define triggering signal ` @@ -516,7 +526,7 @@ One can display the power measurements, frequency array and frequency between co print 'Display the frequency separation between bins' print fd_psd.delta_f -$\Delta f` corresponds to the inverse of a segment's length which is the smallest frequency (i.e. highest period) of detectable signals in each segment. The frequency range spans from 0 to the Nyquist frequency, i.e. half de the sampling rate. +:math:`\Delta f` corresponds to the inverse of a segment's length which is the smallest frequency (i.e. highest period) of detectable signals in each segment. The frequency range spans from 0 to the Nyquist frequency, i.e. half de the sampling rate. Checking filtering settings --------------------------- @@ -777,7 +787,7 @@ The undersampling rate for this tile can be calculated using the channel frequen Explore multiple tile durations ------------------------------- -Now that we create a tile with a specific bandwidth, we can start exploring different durations for the tile. We will start checking if the user manually defined a value for the longest duration tile to compute, which can be done using the =--max-duration= argument. If not, the value will be set to 32. :: +Now that we create a tile with a specific bandwidth, we can start exploring different durations for the tile. We will start checking if the user manually defined a value for the longest duration tile to compute, which can be done using the ``--max-duration`` argument. If not, the value will be set to 32. :: if args.max_duration is not None: max_dof = 2 * args.max_duration * (band * (nc_sum+1)) diff --git a/docs/_sources/epower_overview.rst.txt b/docs/_sources/epower_overview.rst.txt new file mode 100644 index 0000000..8c24321 --- /dev/null +++ b/docs/_sources/epower_overview.rst.txt @@ -0,0 +1,47 @@ +Excess Power - Overview +======================= + +The **Excess Power method** is known as the *optimal detection strategy* to search for burst signals for which only the duration and frequency band are known, which is basically the case for GNOME and its search of Axion-Like Particles (ALP). This method was developed and introduced by `Anderson et al. (200) `_ and has been extensively used in the detection of burst sources of gravitational radiation. A more technical documentation was written by `Brady et al. (2007) `_ describing how the algorithm used by the LIGO collaboration works and how the theory is translated into code. + +We present below a step-by-step procedure followed during the Excess Power search analysis. For a better representation of what is happening, the figure at the end shows how the data is being split and analysed to search for multiple signals of different bandwidth and duration in the time-frequency plane. + +- :ref:`Time domain segmentation and PSD estimate ` + + We first estimate the instrument's noise Power Spectral Density (PSD) by splitting the time-series data into multiple overlapping segments. A periodogram for each segment is calculated separately and then averaged, which will reduce the variance of the individual power measurements. The result is a frequency series where samples are separated in frequency space by :math:`\Delta f` equal to the inverse of a segment’s length and with a high end frequency limit equal to the Nyquist limit. The final power spectrum will help reveal the existence, or the absence, of repetitive patterns and correlation structures in a signal process. + +- :ref:`Comb of frequency channels ` + + We then split the PSD frequency series into multiple channels. For each channel, a frequency domain filter is created with a :math:`\Delta f` determined by the PSD and a total extent in Fourier space that is twice the stated bandwidth of a channel. The result is a list of each channel filter's frequency series. + +- :ref:`Creating analysing blocks ` + + The Excess Power method can lead to moderately-large computational requirements, and it has been found that the computational efficiency of this implementation can be improved upon by considering blocks of data that are much longer than the longest signal time duration. The entire time series is therefore split into separate blocks. We use the length of the segments used for PSD estimate to define the duration of each block. For each block, the time series is c0Aonverted into frequency series which is then filtered by the filter bank throughout all the channels. A time-frequency map is finally created which stores all the filtered frequency series from each channel. + +- :ref:`Creating tiles with different bandwidth ` + + We can now construct tiles with different bandwidth by summing multiple channels together. + +- :ref:`Exploring tiles with different duration ` + + For each given tile's bandwidth, one can investigate different tile's duration. This can be done by exploring different number of degrees of freedom, :math:`d`, which can be calculated as follows: :math:`d=2BT` where :math:`B` and :math:`T` are respectively the bandwidth and duration of the tile. Section 2.2.5 of `Brady et al. `_ gives a great description of how to interpret the number of degrees of freedom. Therefore, by changing the :math:`d`, one can explore multiple tile's duration for different bandwidth. + +- :ref:`Define triggering signal ` + + The energy of each tile in the time-frequency space is calculated and compare to a user-defined threshold value. After defining a tile false alarm probability threshold in Gaussian noise and using the number of degrees of freedom for each tile, one can define a energy threshold value above which a burst trigger can be identified by comparing the energy threshold with the tile's energy in the time-frequency map. A tile energy time frequency map plot similar to Figure 5 in `Pustelny et al. (2013) `_ can then be made which plots the outlying tile energies present in the data. + +.. figure:: ./img/overview.png + + Overview of the Excess Power method and difference between segments, channels, tiles and blocks. + +Code access +----------- + +.. currentmodule:: gdas.epower + +.. autosummary:: + :toctree: generated/ + + excess_power + + + diff --git a/docs/_sources/epower_step1_checkfilt.rst.txt b/docs/_sources/epower_step1_checkfilt.rst.txt new file mode 100644 index 0000000..3e0163f --- /dev/null +++ b/docs/_sources/epower_step1_checkfilt.rst.txt @@ -0,0 +1,50 @@ +Excess Power - Step 1: Checking filtering settings +================================================== + +The first thing to check is that the frequency of the high-pass filter (if defined) is below the minimum frequency of the filter bank. Indeed, a high-pass filter will only let pass frequency that are higher than the cutoff frequency (here defined by the ``strain_high_pass`` argument). If the high pass frequency is greater from the minimum frequency in the filter bank, the signal with frequencies lower than the cutoff frequency will get attenuated. :: + + if args.min_frequency < args.strain_high_pass: + print >>sys.stderr, "Warning: strain high pass frequency %f is greater than the tile minimum frequency %f --- this is likely to cause strange output below the bandpass frequency" % (args.strain_high_pass, args.min_frequency) + +In case the maximum frequency in the filter bank is not defined, we set it to be equal to the Nyquist frequency, i.e. half the sampling rate, which makes sense as a larger signal will not be able to get easily identifiable. :: + + if args.max_frequency is None: + args.max_frequency = args.sample_rate / 2.0 + +If the bandwidth of the finest filter (``--tile-bandwidth`` argument, see section :ref:`construct_args ` or the number of frequency channels (=--channels= argument) is not defined but the total spectral band is (``data_band``), one can then determined all the filter settings as follows: :: + + + if args.tile_bandwidth is None and args.channels is None: + # Exit program with error message + exit("Either --tile-bandwidth or --channels must be specified to set up time-frequency plane") + else: + # Define as assert statement that tile maximum frequency larger than its minimum frequency + assert args.max_frequency >= args.min_frequency + # Define spectral band of data + data_band = args.max_frequency - args.min_frequency + # Check if tile bandwidth or channel is defined + if args.tile_bandwidth is not None: + # Define number of possible filter bands + nchans = args.channels = int(data_band / args.tile_bandwidth) - 1 + elif args.channels is not None: + # Define filter bandwidth + band = args.tile_bandwidth = data_band / (args.channels + 1) + assert args.channels > 1 + +The minimum frequency to be explored can be user-defined by using the ``--min-frequency`` option. :: + + # Lowest frequency of the first filter + flow = args.min_frequency + +Code access +----------- + +.. currentmodule:: gdas.epower + +.. autosummary:: + :toctree: generated/ + + check_filtering_settings + + + diff --git a/docs/_sources/epower_step2_psd.rst.txt b/docs/_sources/epower_step2_psd.rst.txt new file mode 100644 index 0000000..3960f7e --- /dev/null +++ b/docs/_sources/epower_step2_psd.rst.txt @@ -0,0 +1,54 @@ +.. _psdestimate: + +Excess Power - Step 2: Estimate Power Spectral Density (PSD) +============================================================ + +The instrument's noise Power Spectral Density (PSD) will be used to whiten the data and help reveal the existence, or the absence, of repetitive patterns and correlation structures in the signal process. It will also determine the total bandwidth spanned by each of the filters that will subsequently be created. The first thing to do before calculating the PSD is to ensure that the time series data is converted into an array of floating values. :: + + # Convert time series as array of float + data = ts_data.astype(numpy.float64) + +The PSD is calculated by splitting up the signal into overlapping segments and scan through each segment to calculate individual periodogram. The periodograms from each segment are then averaged, reducing the variance of the individual power measurements. In order to proceed, we need to define the average method, ``avg_method``, that will be used to measure the PSD from the data. This can be specified with the ``--psd-estimation`` option. :: + + # Average method to measure PSD from the data + avg_method = args.psd_estimation + +One also needs to specify the length of each segment, ``seg_len``, as well as the separation between 2 consecutive segments, ``seg_stride``. Both parameters can be defined in second units with the ``--psd-segment-length`` and ``--psd-segment-stride`` arguments respectively and can then be converted into sample unit. :: + + # The segment length for PSD estimation in samples + seg_len = int(args.psd_segment_length * args.sample_rate) + # The separation between consecutive segments in samples + seg_stride = int(args.psd_segment_stride * args.sample_rate) + +We then use the `Welch's method `_ to perform the power spectral density estimate using the `welch `_ module from the ``pycbc.psd`` library. What this will do is to compute the discrete Fourier transform for each PSD segment to produce invidual periodograms, and then compute the squared magnitude of the result. The individual periodograms are then averaged using the user-defined average method, ``avg_method``, and return the frequency series, ``fd_psd``, which will store the power measurement for each frequency bin. :: + + # Lifted from the psd.from_cli module + fd_psd = psd.welch(data,avg_method=avg_method,seg_len=seg_len,seg_stride=seg_stride) + # Plot the power spectral density + plot_spectrum(fd_psd) + # We need this for the SWIG functions + lal_psd = fd_psd.lal() + +One can display the power measurements, frequency array and frequency between consecutive samples, :math:`\Delta f` in Hertz, by printing the following variables: :: + + print 'Display power measurements of the first 10 frequency bins' + print fd_psd[:10] + print 'Display central frequency of the first 10 bins' + print fd_psd.sample_frequencies[:10] + print 'Display the frequency separation between bins' + print fd_psd.delta_f + +:math:`\Delta f` corresponds to the inverse of a segment's length which is the smallest frequency (i.e. highest period) of detectable signals in each segment. The frequency range spans from 0 to the Nyquist frequency, i.e. half de the sampling rate. + +Code access +----------- + +.. currentmodule:: gdas.epower + +.. autosummary:: + :toctree: generated/ + + calculate_psd + + + diff --git a/docs/_sources/epower_step3_speccor.rst.txt b/docs/_sources/epower_step3_speccor.rst.txt new file mode 100644 index 0000000..d9ab49f --- /dev/null +++ b/docs/_sources/epower_step3_speccor.rst.txt @@ -0,0 +1,25 @@ +Excess Power - Step 3: Two point spectral correlation +===================================================== + +This part determines how much data on either side of the tukey window is to be discarded. Nominally, this means that one will lose ``window_fraction`` * ``args.psd_segment_length`` to corruption from the window, i.e. this is simply discarded. This is tuned to give an integer offset when used with ``args.psd_segment_length`` equal to 8, smaller windows will have fractions of integers, but larger powers of two will still preseve this (probably not a big deal in the end). :: + + window_fraction = 0 + +The two point spectral correlation is then done with the :ref:`calculate_spectral_correlation ` function which will return both the Tukey window applied to the original time series data and the actual two-point spectral correlation function for the whitened frequency series from the applied whitening window. :: + + # Do two point spectral correlation + window, spec_corr = calculate_spectral_correlation(seg_len,'tukey',window_fraction=window_fraction) + window = window.data.data + window_sigma_sq = numpy.mean(window**2) + # Pre scale the window by its root mean squared -- see eqn 11 of EP document + #window /= numpy.sqrt(window_sigma_sq) + +Code access +----------- + +.. currentmodule:: gdas.epower + +.. autosummary:: + :toctree: generated/ + + calculate_spectral_correlation diff --git a/docs/_sources/epower_step4_filterbank.rst.txt b/docs/_sources/epower_step4_filterbank.rst.txt new file mode 100644 index 0000000..57855a6 --- /dev/null +++ b/docs/_sources/epower_step4_filterbank.rst.txt @@ -0,0 +1,45 @@ +.. _filterbank: + +Excess Power - Step 4: Computing the filter bank +================================================ + +The filter bank will create band-pass filters for each channel in the PSD frequency domain. The :ref:`create_filter_bank ` function will san the bandwidth from the central frequency of the first channel (i.e. flow+band/2) to final frequency of the last channel (i.e. band*nchans) in a increment equal to the frequency band. The filter's total extent in Fourier space is actually twice the stated bandwidth (FWHM). :: + + # Define filters + filter_bank, fdb = create_filter_bank(fd_psd.delta_f, flow+band/2, band, nchans, fd_psd, spec_corr) + +This function will returns 2 arrays: the ``filter_bank`` array which is a list of `COMPLEX16FrequencySeries `_ arrays corresponding to each channel's filter, and the =fdb= array which provides the time-series from each filter. The length of each array is equal to the total number of channel (i.e. =nchans=). The filter's data, :math:`\Delta f` value, and first and last frequencies of any channel's filter can be displayed as followed: :: + + # Print data of first channel's filter + print filter_bank[0].data.data + # Print frequency separation between 2 values in the first channel's filter + print filter_bank[0].deltaF + # Print first frequency of the first channel's filter + print filter_bank[0].f0 + # Print last frequency of the first channel's filter (equal to twice the channel's bandwidth) + print filter_bank[0].f0+(len(filter_bank[0].data.data)-1)*filter_bank[0].deltaF + +Further in the analysis, the following filters will used: +1. ``white_filter_ip``: Whitened filter inner products computed with themselves. +2. ``unwhite_filter_ip``: Unwhitened filter inner products computed with themselves. +3. ``white_ss_ip``: Whitened filter inner products computed between input adjacent filters. +4. ``unwhite_ss_ip``: Unwhitened filter inner products computed between input adjacent filters. + +:: + + # This is necessary to compute the mu^2 normalizations + white_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, None) + unwhite_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, lal_psd) + # These two are needed for the unwhitened mean square sum (hrss) + white_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, None) + unwhite_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, lal_psd) + +Code access +----------- + +.. currentmodule:: gdas.epower + +.. autosummary:: + :toctree: generated/ + + create_filter_bank diff --git a/docs/_sources/epower_step5_normalization.rst.txt b/docs/_sources/epower_step5_normalization.rst.txt new file mode 100644 index 0000000..66b0f3e --- /dev/null +++ b/docs/_sources/epower_step5_normalization.rst.txt @@ -0,0 +1,23 @@ +Excess Power - Step 5: Normalization of virtual channel +======================================================= + +The virtual channels will be used during the excesspower analysis to explore different frequency ranges around each PSD segments and look for possible triggers. Each channel is renormalized using the :ref:`compute_channel_renomalization ` internal function. :: + + # Initialise dictionary + mu_sq_dict = {} + # nc_sum additional channel adds + for nc_sum in range(0, int(math.log(nchans, 2))): + min_band = (len(filter_bank[0].data.data)-1) * filter_bank[0].deltaF / 2 + print tprint(t0,t1),"Calculation for %d %d Hz channels" % (nc_sum+1, min_band) + nc_sum = 2**nc_sum - 1 + mu_sq_dict[nc_sum] = compute_channel_renomalization(nc_sum, filter_bank, spec_corr, nchans) + +Code access +----------- + +.. currentmodule:: gdas.epower + +.. autosummary:: + :toctree: generated/ + + compute_channel_renormalization diff --git a/docs/_sources/epower_step6_initialisation.rst.txt b/docs/_sources/epower_step6_initialisation.rst.txt new file mode 100644 index 0000000..02e17f4 --- /dev/null +++ b/docs/_sources/epower_step6_initialisation.rst.txt @@ -0,0 +1,41 @@ +Initialise event list and determine stride boundaries +===================================================== + +First of all, we create a table similar than the one made by the LIGO Scientific Collaboration (LSC) where all the information will be stored. Such table is commonly know as ``lsctables``. A pre-defined LSC table can be constructed using ``New`` function from the `glue.ligolw.lsctables `_ module. We use the ``SnglBurstTable`` function for the type of data to be stored and define all the columns we wish to record. :: + + # Create event list for single burst table + event_list = lsctables.New(lsctables.SnglBurstTable, + ['start_time','start_time_ns','peak_time','peak_time_ns', + 'duration','bandwidth','central_freq','chisq_dof', + 'confidence','snr','amplitude','channel','ifo', + 'process_id','event_id','search','stop_time','stop_time_ns']) + +We also need to determine the indexes of both starting and ending times for the first segment to analyse, respectively ``t_idx_min`` and ``t_idx_max``. The default values are considered to be 0 for the starting index and the segment length in sample unit for the ending time index. Also, if the user defines a different starting time than the one from the loaded data, the offset index in sample unit is determined and added the both starting and ending time indexes. :: + + # Determine boundaries of stride in time domain + t_idx_min, t_idx_max = 0, seg_len + # Check if user requested starting time is defined + if args.analysis_start_time is not None: + # Define the time difference in seconds between data and user requested starting times + t_idx_off = args.analysis_start_time - ts_data.start_time + # Calculate the index of the user requested starting point in the data + t_idx_off = int(t_idx_off * args.sample_rate) + else: + # Define index of the starting point as first value in data + t_idx_off = 0 + # Initialise minimum index values as offset starting index + t_idx_min += t_idx_off + # Initialise maximum index values as offset starting index + t_idx_max += t_idx_off + +Finally, the index for the ending time after all the segments have been analysed can be estimated for the user-defined parameter or is defined as the length of the time series data ``ts_data``. :: + + # Check if user requested end time is defined + if args.analysis_end_time is not None: + # Define the time difference between data and user requested ending times + t_idx_max_off = args.analysis_end_time - ts_data.start_time + # Calculate the index of the user requested starting point in the data + t_idx_max_off = int(t_idx_max_off * args.sample_rate) + else: + # Define index of the ending point as the length of data array + t_idx_max_off = len(ts_data) diff --git a/docs/_sources/example.rst.txt b/docs/_sources/example.rst.txt new file mode 100644 index 0000000..6677764 --- /dev/null +++ b/docs/_sources/example.rst.txt @@ -0,0 +1,41 @@ +Working Example +=============== + +Either on your own computer or on the server, on a Jupyter notebook or on a Python script, the first thing to do is to import the ``gdas`` package that contain all the modules present in the GNOME software. That can be done easily by doing the following:: + + import gdas + +In order to retrieve a specific chunk of data to be analyzed for a particular station, the name of the station along with the start and end dates should be specified:: + + station = 'fribourg01' + start_time = '2016-11-03-04' + end_time = '2016-11-03-04-2' + +where the start and end times should always have at least the year, month and day specified, and with the values separated by a dash symbol. Hour and minute can also be specified. + +If you are not working on the server and the data are located in a different repository than ``/GNOMEDrive/gnome/serverdata/``, a custom path can be defined. For instance:: + + datapath = '/Users/vincent/data/GNOMEDrive/gnome/serverdata/' + +The magnetic field data can then be retrieve as follows:: + + ts_data,ts_list,activity = gdas.magfield(station,start_time,end_time,rep=datapath) + +The ``gdas.magfield`` method will return 3 arrays of data that can then be used to produce different plots:: + + gdas.plot_activity(activity) + gdas.plot_time_series(station,ts_list,seglist=activity) + gdas.plot_asd(station,ts_list) + gdas.plot_whitening(station,ts_list,activity) + +This is a script to do Excess Power analysis:: + + psd_segment_length = 60 + psd_segment_stride = 30 + psd_estimation = 'median-mean' + window_fraction = 0 + tile_fap = 1e-5 + channels = 250 + + gdas.excess_power(ts_data,psd_segment_length,psd_segment_stride,psd_estimation,window_fraction,tile_fap,station,nchans=channels) + gdas.plot_triggers() diff --git a/docs/_sources/excess_power.rst.txt b/docs/_sources/excess_power.rst.txt new file mode 100644 index 0000000..f00a33d --- /dev/null +++ b/docs/_sources/excess_power.rst.txt @@ -0,0 +1,923 @@ +Excess-Power algorithm +====================== + +General overview +---------------- + +The **Excess Power method** is known as the *optimal detection strategy* to search for burst signals for which only the duration and frequency band are known, which is basically the case for GNOME and its search of Axion-Like Particles (ALP). This method was developed and introduced by `Anderson et al. (200) `_ and has been extensively used in the detection of burst sources of gravitational radiation. A more technical documentation was written by `Brady et al. (2007) `_ describing how the algorithm used by the LIGO collaboration works and how the theory is translated into code. + +We present below a step-by-step procedure followed during the Excess Power search analysis. For a better representation of what is happening, the figure at the end shows how the data is being split and analysed to search for multiple signals of different bandwidth and duration in the time-frequency plane. + +- :ref:`Time domain segmentation and PSD estimate ` + + We first estimate the instrument's noise Power Spectral Density (PSD) by splitting the time-series data into multiple overlapping segments. A periodogram for each segment is calculated separately and then averaged, which will reduce the variance of the individual power measurements. The result is a frequency series where samples are separated in frequency space by :math:`\Delta f` equal to the inverse of a segment’s length and with a high end frequency limit equal to the Nyquist limit. The final power spectrum will help reveal the existence, or the absence, of repetitive patterns and correlation structures in a signal process. + +- :ref:`Comb of frequency channels ` + + We then split the PSD frequency series into multiple channels. For each channel, a frequency domain filter is created with a :math:`\Delta f` determined by the PSD and a total extent in Fourier space that is twice the stated bandwidth of a channel. The result is a list of each channel filter's frequency series. + +- :ref:`Creating analysing blocks ` + + The Excess Power method can lead to moderately-large computational requirements, and it has been found that the computational efficiency of this implementation can be improved upon by considering blocks of data that are much longer than the longest signal time duration. The entire time series is therefore split into separate blocks. We use the length of the segments used for PSD estimate to define the duration of each block. For each block, the time series is c0Aonverted into frequency series which is then filtered by the filter bank throughout all the channels. A time-frequency map is finally created which stores all the filtered frequency series from each channel. + +- :ref:`Creating tiles with different bandwidth ` + + We can now construct tiles with different bandwidth by summing multiple channels together. + +- :ref:`Exploring tiles with different duration ` + + For each given tile's bandwidth, one can investigate different tile's duration. This can be done by exploring different number of degrees of freedom, :math:`d`, which can be calculated as follows: :math:`d=2BT` where :math:`B` and :math:`T` are respectively the bandwidth and duration of the tile. Section 2.2.5 of `Brady et al. `_ gives a great description of how to interpret the number of degrees of freedom. Therefore, by changing the :math:`d`, one can explore multiple tile's duration for different bandwidth. + +- :ref:`Define triggering signal ` + + The energy of each tile in the time-frequency space is calculated and compare to a user-defined threshold value. After defining a tile false alarm probability threshold in Gaussian noise and using the number of degrees of freedom for each tile, one can define a energy threshold value above which a burst trigger can be identified by comparing the energy threshold with the tile's energy in the time-frequency map. A tile energy time frequency map plot similar to Figure 5 in `Pustelny et al. (2013) `_ can then be made which plots the outlying tile energies present in the data. + +.. figure:: ./img/overview.png + + Overview of the Excess Power method and difference between segments, channels, tiles and blocks. + +.. _psdestimate: + +Estimate Power Spectral Density (PSD) +------------------------------------- + +The instrument's noise Power Spectral Density (PSD) will be used to whiten the data and help reveal the existence, or the absence, of repetitive patterns and correlation structures in the signal process. It will also determine the total bandwidth spanned by each of the filters that will subsequently be created. The first thing to do before calculating the PSD is to ensure that the time series data is converted into an array of floating values. :: + + # Convert time series as array of float + data = ts_data.astype(numpy.float64) + +The PSD is calculated by splitting up the signal into overlapping segments and scan through each segment to calculate individual periodogram. The periodograms from each segment are then averaged, reducing the variance of the individual power measurements. In order to proceed, we need to define the average method, ``avg_method``, that will be used to measure the PSD from the data. This can be specified with the ``--psd-estimation`` option. :: + + # Average method to measure PSD from the data + avg_method = args.psd_estimation + +One also needs to specify the length of each segment, ``seg_len``, as well as the separation between 2 consecutive segments, ``seg_stride``. Both parameters can be defined in second units with the ``--psd-segment-length`` and ``--psd-segment-stride`` arguments respectively and can then be converted into sample unit. :: + + # The segment length for PSD estimation in samples + seg_len = int(args.psd_segment_length * args.sample_rate) + # The separation between consecutive segments in samples + seg_stride = int(args.psd_segment_stride * args.sample_rate) + +We then use the `Welch's method `_ to perform the power spectral density estimate using the `welch `_ module from the ``pycbc.psd`` library. What this will do is to compute the discrete Fourier transform for each PSD segment to produce invidual periodograms, and then compute the squared magnitude of the result. The individual periodograms are then averaged using the user-defined average method, ``avg_method``, and return the frequency series, ``fd_psd``, which will store the power measurement for each frequency bin. :: + + # Lifted from the psd.from_cli module + fd_psd = psd.welch(data,avg_method=avg_method,seg_len=seg_len,seg_stride=seg_stride) + # Plot the power spectral density + plot_spectrum(fd_psd) + # We need this for the SWIG functions + lal_psd = fd_psd.lal() + +One can display the power measurements, frequency array and frequency between consecutive samples, :math:`\Delta f` in Hertz, by printing the following variables: :: + + print 'Display power measurements of the first 10 frequency bins' + print fd_psd[:10] + print 'Display central frequency of the first 10 bins' + print fd_psd.sample_frequencies[:10] + print 'Display the frequency separation between bins' + print fd_psd.delta_f + +:math:`\Delta f` corresponds to the inverse of a segment's length which is the smallest frequency (i.e. highest period) of detectable signals in each segment. The frequency range spans from 0 to the Nyquist frequency, i.e. half de the sampling rate. + +Checking filtering settings +--------------------------- + +The first thing to check is that the frequency of the high-pass filter (if defined) is below the minimum frequency of the filter bank. Indeed, a high-pass filter will only let pass frequency that are higher than the cutoff frequency (here defined by the ``strain_high_pass`` argument). If the high pass frequency is greater from the minimum frequency in the filter bank, the signal with frequencies lower than the cutoff frequency will get attenuated. :: + + if args.min_frequency < args.strain_high_pass: + print >>sys.stderr, "Warning: strain high pass frequency %f is greater than the tile minimum frequency %f --- this is likely to cause strange output below the bandpass frequency" % (args.strain_high_pass, args.min_frequency) + +In case the maximum frequency in the filter bank is not defined, we set it to be equal to the Nyquist frequency, i.e. half the sampling rate, which makes sense as a larger signal will not be able to get easily identifiable. :: + + if args.max_frequency is None: + args.max_frequency = args.sample_rate / 2.0 + +If the bandwidth of the finest filter (``--tile-bandwidth`` argument, see section :ref:`construct_args ` or the number of frequency channels (=--channels= argument) is not defined but the total spectral band is (``data_band``), one can then determined all the filter settings as follows: :: + + + if args.tile_bandwidth is None and args.channels is None: + # Exit program with error message + exit("Either --tile-bandwidth or --channels must be specified to set up time-frequency plane") + else: + # Define as assert statement that tile maximum frequency larger than its minimum frequency + assert args.max_frequency >= args.min_frequency + # Define spectral band of data + data_band = args.max_frequency - args.min_frequency + # Check if tile bandwidth or channel is defined + if args.tile_bandwidth is not None: + # Define number of possible filter bands + nchans = args.channels = int(data_band / args.tile_bandwidth) - 1 + elif args.channels is not None: + # Define filter bandwidth + band = args.tile_bandwidth = data_band / (args.channels + 1) + assert args.channels > 1 + +The minimum frequency to be explored can be user-defined by using the ``--min-frequency`` option. :: + + # Lowest frequency of the first filter + flow = args.min_frequency + +Whitening window and spectral correlation +----------------------------------------- + +This part determines how much data on either side of the tukey window is to be discarded. Nominally, this means that one will lose ``window_fraction`` * ``args.psd_segment_length`` to corruption from the window, i.e. this is simply discarded. This is tuned to give an integer offset when used with ``args.psd_segment_length`` equal to 8, smaller windows will have fractions of integers, but larger powers of two will still preseve this (probably not a big deal in the end). :: + + window_fraction = 0 + +The two point spectral correlation is then done with the :ref:`calculate_spectral_correlation ` function which will return both the Tukey window applied to the original time series data and the actual two-point spectral correlation function for the whitened frequency series from the applied whitening window. :: + + # Do two point spectral correlation + window, spec_corr = calculate_spectral_correlation(seg_len,'tukey',window_fraction=window_fraction) + window = window.data.data + window_sigma_sq = numpy.mean(window**2) + # Pre scale the window by its root mean squared -- see eqn 11 of EP document + #window /= numpy.sqrt(window_sigma_sq) + +.. _filterbank: + +Computing the filter bank +------------------------- + +The filter bank will create band-pass filters for each channel in the PSD frequency domain. The :ref:`create_filter_bank ` function will san the bandwidth from the central frequency of the first channel (i.e. flow+band/2) to final frequency of the last channel (i.e. band*nchans) in a increment equal to the frequency band. The filter's total extent in Fourier space is actually twice the stated bandwidth (FWHM). :: + + # Define filters + filter_bank, fdb = create_filter_bank(fd_psd.delta_f, flow+band/2, band, nchans, fd_psd, spec_corr) + +This function will returns 2 arrays: the ``filter_bank`` array which is a list of `COMPLEX16FrequencySeries `_ arrays corresponding to each channel's filter, and the =fdb= array which provides the time-series from each filter. The length of each array is equal to the total number of channel (i.e. =nchans=). The filter's data, :math:`\Delta f` value, and first and last frequencies of any channel's filter can be displayed as followed: :: + + # Print data of first channel's filter + print filter_bank[0].data.data + # Print frequency separation between 2 values in the first channel's filter + print filter_bank[0].deltaF + # Print first frequency of the first channel's filter + print filter_bank[0].f0 + # Print last frequency of the first channel's filter (equal to twice the channel's bandwidth) + print filter_bank[0].f0+(len(filter_bank[0].data.data)-1)*filter_bank[0].deltaF + +Further in the analysis, the following filters will used: +1. ``white_filter_ip``: Whitened filter inner products computed with themselves. +2. ``unwhite_filter_ip``: Unwhitened filter inner products computed with themselves. +3. ``white_ss_ip``: Whitened filter inner products computed between input adjacent filters. +4. ``unwhite_ss_ip``: Unwhitened filter inner products computed between input adjacent filters. + +:: + + # This is necessary to compute the mu^2 normalizations + white_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, None) + unwhite_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, lal_psd) + # These two are needed for the unwhitened mean square sum (hrss) + white_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, None) + unwhite_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, lal_psd) + +Normalization of virtual channel +-------------------------------- + +The virtual channels will be used during the excesspower analysis to explore different frequency ranges around each PSD segments and look for possible triggers. Each channel is renormalized using the :ref:`compute_channel_renomalization ` internal function. :: + + # Initialise dictionary + mu_sq_dict = {} + # nc_sum additional channel adds + for nc_sum in range(0, int(math.log(nchans, 2))): + min_band = (len(filter_bank[0].data.data)-1) * filter_bank[0].deltaF / 2 + print tprint(t0,t1),"Calculation for %d %d Hz channels" % (nc_sum+1, min_band) + nc_sum = 2**nc_sum - 1 + mu_sq_dict[nc_sum] = compute_channel_renomalization(nc_sum, filter_bank, spec_corr, nchans) + +Initialise event list and determine stride boundaries +----------------------------------------------------- + +First of all, we create a table similar than the one made by the LIGO Scientific Collaboration (LSC) where all the information will be stored. Such table is commonly know as ``lsctables``. A pre-defined LSC table can be constructed using ``New`` function from the `glue.ligolw.lsctables `_ module. We use the ``SnglBurstTable`` function for the type of data to be stored and define all the columns we wish to record. :: + + # Create event list for single burst table + event_list = lsctables.New(lsctables.SnglBurstTable, + ['start_time','start_time_ns','peak_time','peak_time_ns', + 'duration','bandwidth','central_freq','chisq_dof', + 'confidence','snr','amplitude','channel','ifo', + 'process_id','event_id','search','stop_time','stop_time_ns']) + +We also need to determine the indexes of both starting and ending times for the first segment to analyse, respectively ``t_idx_min`` and ``t_idx_max``. The default values are considered to be 0 for the starting index and the segment length in sample unit for the ending time index. Also, if the user defines a different starting time than the one from the loaded data, the offset index in sample unit is determined and added the both starting and ending time indexes. :: + + # Determine boundaries of stride in time domain + t_idx_min, t_idx_max = 0, seg_len + # Check if user requested starting time is defined + if args.analysis_start_time is not None: + # Define the time difference in seconds between data and user requested starting times + t_idx_off = args.analysis_start_time - ts_data.start_time + # Calculate the index of the user requested starting point in the data + t_idx_off = int(t_idx_off * args.sample_rate) + else: + # Define index of the starting point as first value in data + t_idx_off = 0 + # Initialise minimum index values as offset starting index + t_idx_min += t_idx_off + # Initialise maximum index values as offset starting index + t_idx_max += t_idx_off + +Finally, the index for the ending time after all the segments have been analysed can be estimated for the user-defined parameter or is defined as the length of the time series data ``ts_data``. :: + + # Check if user requested end time is defined + if args.analysis_end_time is not None: + # Define the time difference between data and user requested ending times + t_idx_max_off = args.analysis_end_time - ts_data.start_time + # Calculate the index of the user requested starting point in the data + t_idx_max_off = int(t_idx_max_off * args.sample_rate) + else: + # Define index of the ending point as the length of data array + t_idx_max_off = len(ts_data) + +.. _analysingblocks: + +Define analysing blocks +----------------------- + +The first thing we do is to calculate the time series for the segment that is covered (``tmp_ts_data``) and redefined the metadata, especially the time of the first sample in seconds which is defined by the ``epoch`` argument and is different for every segment. After plotting the time series for that segment, the data are then converted into frequency series (``fs_data``) using the `to_frequencyseries `_ module from the ``pycbc.types.timeseries.TimeSeries`` library. Finally, the frequency data are then whitened. :: + + # Loop over each data within the user requested time period + while t_idx_max <= t_idx_max_off: + # Define starting and ending time of the segment in seconds + start_time = ts_data.start_time + t_idx_min/float(args.sample_rate) + end_time = ts_data.start_time + t_idx_max/float(args.sample_rate) + print tprint(t0,t1),"Analyzing block %i to %i (%.2f percent)"%(start_time,end_time,100*float(t_idx_max)/float(idx_max_off)) + # Model a withen time series for the block + tmp_ts_data = types.TimeSeries(ts_data[t_idx_min:t_idx_max]*window, 1.0/args.sample_rate,epoch=start_time) + # Save time series in segment repository + segfolder = 'segments/%i-%i'%(start_time,end_time) + os.system('mkdir -p '+segfolder) + plot_ts(tmp_ts_data,fname='%s/ts.png'%(segfolder)) + # Convert times series to frequency series + fs_data = tmp_ts_data.to_frequencyseries() + print tprint(t0,t1),"Frequency series data has variance: %s" % fs_data.data.std()**2 + # Whitening (FIXME: Whiten the filters, not the data) + fs_data.data /= numpy.sqrt(fd_psd) / numpy.sqrt(2 * fd_psd.delta_f) + print tprint(t0,t1),"Whitened frequency series data has variance: %s" % fs_data.data.std()**2 + +Create time-frequency map for each block +---------------------------------------- + +We initialise a 2D zero array for a time-frequency map (``tf_map``) which will be computed for each frequency-domain filter associated to each PSD segment and where the filtered time-series for each frequency channels will be stored. The number of rows corresponds to the total number of frequency channels which is defined by the ``nchans`` variable. The number of columns corresponds to the segment length in samples (i.e. the number of samples covering one segment) which is defined by the ``seg_len`` variable. :: + + # Initialise 2D zero array for time-frequency map + tf_map = numpy.zeros((nchans, seg_len), dtype=numpy.complex128) + +We also initialise a zero vector for a temporary filter bank (``tmp_filter_bank``) that will store, for a given channel, the filter's values from the original filter bank (``filter_bank``) for that channel only. The length of the temporary filter bank is equal to the length of the PSD frequency series (``fd_psd``). :: + + # Initialise 1D zero array + tmp_filter_bank = numpy.zeros(len(fd_psd), dtype=numpy.complex128) + +We then loop over all the frequency channels. While in the loop, we first re-initialise the temporary filter bank with zero values everywhere along the frequency series. We then determine the first and last frequency of each channel and re-define the values of the filter in that frequency range based on the values from the original channel's filter from the original filter bank. :: + + # Loop over all the channels + print tprint(t0,t1),"Filtering all %d channels..." % nchans + for i in range(nchans): + # Reset filter bank series + tmp_filter_bank *= 0.0 + # Index of starting frequency + f1 = int(filter_bank[i].f0/fd_psd.delta_f) + # Index of ending frequency + f2 = int((filter_bank[i].f0 + 2*band)/fd_psd.delta_f)+1 + # (FIXME: Why is there a factor of 2 here?) + tmp_filter_bank[f1:f2] = filter_bank[i].data.data * 2 + +We then extract the frequency series from the filter bank for that channel, which will be used as a template waveform to filter the actual data from the channel. :: + + # Define the template to filter the frequency series with + template = types.FrequencySeries(tmp_filter_bank, delta_f=fd_psd.delta_f, copy=False) + +Finally, we use the `matched_filter_core `_ module from the ``pycbc.filter.matchedfilter`` library to filter the frequency series from the channel. This will return both a time series containing the complex signal-to-noise matched filtered against the data, and a frequency series containing the correlation vector. :: + + # Create filtered series + filtered_series = filter.matched_filter_core(template,fs_data,h_norm=None,psd=None, + low_frequency_cutoff=filter_bank[i].f0, + high_frequency_cutoff=filter_bank[i].f0+2*band) + +The `matched filter `_ is the optimal linear filter for maximizing the signal to noise ratio (SNR) in the presence of additive stochastic noise. The filtered time series is stored in the time-frequency map and can be used to produce a spectrogram of the segment of data being analysed. :: + + # Include filtered series in the map + tf_map[i,:] = filtered_series[0].numpy() + +The time-frequency map is a 2D array with a length that corresponds to the number of channels and a width equal to the number of sample present in one segment of data, i.e. segment's length in seconds times the the sampling rate. The map can finally be plotted with a :math:`\Delta t` corresponding to the sampling period of the original dataset (i.e. inverse of the original sampling rate), and :math:`\Delta f` is equal to the bandwidth of one channel. :: + + plot_spectrogram(numpy.abs(tf_map).T,tmp_ts_data.delta_t,fd_psd.delta_f,ts_data.sample_rate,start_time,end_time,fname='%s/tf.png'%(segfolder)) + +.. _tilebandwidth: + +Constructing tiles of different bandwidth +----------------------------------------- + +First and foremost, we define a clipping region in the data to be used to remove window corruption, this is non-zero if the ``window_fraction`` variable is set to a non-zero value. :: + + print tprint(t0,t1),"Beginning tile construction..." + # Clip the boundaries to remove window corruption + clip_samples = int(args.psd_segment_length * window_fraction * args.sample_rate / 2) + +In order to perform a multi-resolution search, tiles of many different bandwidths and durations will be scanned. We first need to setup a loop such that the maximum number of additional channel is equal to the base 2 logarithm of the total number of channels. The number of narrow band channels to be summed (``nc_sum``) would therefore be equal to 2 to the power of the current quantity of additional channels. :: + + for nc_sum in range(0, int(math.log(nchans, 2)))[::-1]: # nc_sum additional channel adds + nc_sum = 2**nc_sum - 1 + print tprint(t0,t1,t2),"Summing %d narrow band channels..." % (nc_sum+1) + +The undersampling rate for this tile can be calculated using the channel frequency band and the number of narrow band channels to be summed such that the bandwidth of the tile is equal to ``band * (nc_sum + 1)``. :: + + us_rate = int(round(1.0 / (2 * band*(nc_sum+1) * ts_data.delta_t))) + print >>sys.stderr, "Undersampling rate for this level: %f" % (args.sample_rate/us_rate) + +"Virtual" wide bandwidth channels are constructed by summing the samples from multiple channels, and correcting for the overlap between adjacent channel filters. We then define the normalised channel at the current level and create a time frequency map for this tile using the :ref:`make_indp_tiles ` internal function. In other word, we are constructing multiple sub-tiles for which we can determined the respective energy in the given frequency band. :: + + mu_sq = mu_sq_dict[nc_sum] + sys.stderr.write("\t...calculating tiles...") + if clip_samples > 0: + tiles = make_indp_tiles(tf_map[:,clip_samples:-clip_samples:us_rate], nc_sum, mu_sq) + else: + tiles = make_indp_tiles(tf_map[:,::us_rate], nc_sum, mu_sq) + sys.stderr.write(" TF-plane is %dx%s samples... " % tiles.shape) + print >>sys.stderr, " done" + print "Tile energy mean: %f, var %f" % (numpy.mean(tiles), numpy.var(tiles)) + +.. _tileduration: + +Explore multiple tile durations +------------------------------- + +Now that we create a tile with a specific bandwidth, we can start exploring different durations for the tile. We will start checking if the user manually defined a value for the longest duration tile to compute, which can be done using the ``--max-duration`` argument. If not, the value will be set to 32. :: + + if args.max_duration is not None: + max_dof = 2 * args.max_duration * (band * (nc_sum+1)) + else: + max_dof = 32 + assert max_dof >= 2 + +Since we produce (initially) tiles with 1 degree of freedom, the duration goes as one over twice the bandwidth. :: + + print "\t\t...getting longer durations..." + #for j in [2**l for l in xrange(1, int(math.log(max_dof, 2))+1)]: + for j in [2**l for l in xrange(0, int(math.log(max_dof, 2)))]: + sys.stderr.write("\t\tSumming DOF = %d ..." % (2*j)) + #tlen = tiles.shape[1] - j + 1 + tlen = tiles.shape[1] - 2*j + 1 + 1 + if tlen <= 0: + print >>sys.stderr, " ...not enough samples." + continue + dof_tiles = numpy.zeros((tiles.shape[0], tlen)) + #:sum_filter = numpy.ones(j) + # FIXME: This is the correct filter for 50% overlap + sum_filter = numpy.array([1,0] * (j-1) + [1]) + #sum_filter = numpy.array([1,0] * int(math.log(j, 2)-1) + [1]) + for f in range(tiles.shape[0]): + # Sum and drop correlate tiles + # FIXME: don't drop correlated tiles + #output = numpy.convolve(tiles[f,:], sum_filter, 'valid') + dof_tiles[f] = fftconvolve(tiles[f], sum_filter, 'valid') + print >>sys.stderr, " done" + print "Summed tile energy mean: %f, var %f" % (numpy.mean(dof_tiles), numpy.var(dof_tiles)) + level_tdiff = time.time() - tdiff + print >>sys.stderr, "Done with this resolution, total %f" % level_tdiff + +Finally, the bandwidth and duration of the tile can be defined as followed: :: + + # Current bandwidth of the time-frequency map tiles + current_band = band * (nc_sum + 1) + # How much each "step" is in the frequency domain -- almost + # assuredly the fundamental bandwidth + df = current_band + # How much each "step" is in the time domain -- under sampling rate + # FIXME: THis won't work if the sample rate isn't a power of 2 + dt = 1.0 / 2 / (2 * current_band) * 2 + full_band = 250 + dt = current_band / full_band * ts_data.sample_rate + dt = 1.0/dt + # Duration is fixed by the NDOF and bandwidth + duration = j / 2.0 / current_band + +.. _triggerfinding: + +Trigger finding +--------------- + +In order to find any trigger in the data, we first need to set a false alarm probability threshold in Gaussian noise above which signal will be distinguished from the noise. Such threshold can be determined by using the /inverse survival function/ method from the `scipy.stats.chi2 `_ package. :: + + threshold = scipy.stats.chi2.isf(args.tile_fap, j) + print "Threshold for this level: %f" % threshold + #if numpy.any(dof_tiles > threshold): + #plot_spectrogram(dof_tiles.T) + #import pdb; pdb.set_trace() + +Once the threshold is set, one can then run the :ref:`trigger_list_from_map ` function to quickly find the trigger signal from the ``dof_tiles`` array that :: + + # Since we clip the data, the start time needs to be adjusted accordingly + window_offset_epoch = fs_data.epoch + args.psd_segment_length * window_fraction / 2 + trigger_list_from_map(dof_tiles, event_list, threshold, window_offset_epoch, filter_bank[0].f0 + band/2, duration, current_band, df, dt, None) + for event in event_list[::-1]: + if event.amplitude != None: + continue + etime_min_idx = float(event.get_start()) - float(fs_data.epoch) + etime_min_idx = int(etime_min_idx / tmp_ts_data.delta_t) + etime_max_idx = float(event.get_start()) - float(fs_data.epoch) + event.duration + etime_max_idx = int(etime_max_idx / tmp_ts_data.delta_t) + # (band / 2) to account for sin^2 wings from finest filters + flow_idx = int((event.central_freq - event.bandwidth / 2 - (band / 2) - flow) / band) + fhigh_idx = int((event.central_freq + event.bandwidth / 2 + (band / 2) - flow) / band) + # TODO: Check that the undersampling rate is always commensurate + # with the indexing: that is to say that + # mod(etime_min_idx, us_rate) == 0 always + z_j_b = tf_map[flow_idx:fhigh_idx,etime_min_idx:etime_max_idx:us_rate] + # FIXME: Deal with negative hrss^2 -- e.g. remove the event + try: + event.amplitude = measure_hrss(z_j_b, unwhite_filter_ip[flow_idx:fhigh_idx], unwhite_ss_ip[flow_idx:fhigh_idx-1], white_ss_ip[flow_idx:fhigh_idx-1], fd_psd.delta_f, tmp_ts_data.delta_t, len(filter_bank[0].data.data), event.chisq_dof) + except ValueError: + event.amplitude = 0 + + print "Total number of events: %d" % len(event_list) + +Switch to new block +------------------- + +The following will move the frequency band to the next segment: :: + + tdiff = time.time() - tdiff + print "Done with this block: total %f" % tdiff + + t_idx_min += int(seg_len * (1 - window_fraction)) + t_idx_max += int(seg_len * (1 - window_fraction)) + +Extracting GPS time range +------------------------- + +We use the `LIGOTimeGPS `_ structure from the =glue.lal= package to /store the starting and ending time in the dataset to nanosecond precision and synchronized to the Global Positioning System time reference/. Once both times are defined, the range of value is stored in a semi-open interval using the `segment `_ module from the =glue.segments= package. :: + + # Starting epoch relative to GPS starting epoch + start_time = LIGOTimeGPS(args.analysis_start_time or args.gps_start_time) + # Ending epoch relative to GPS ending epoch + end_time = LIGOTimeGPS(args.analysis_end_time or args.gps_end_time) + # Represent the range of values in the semi-open interval + inseg = segment(start_time,end_time) + +Prepare output file for given time range +---------------------------------------- + +:: + + xmldoc = ligolw.Document() + xmldoc.appendChild(ligolw.LIGO_LW()) + + ifo = args.channel_name.split(":")[0] + proc_row = register_to_xmldoc(xmldoc, __program__, args.__dict__, ifos=[ifo],version=glue.git_version.id, cvs_repository=glue.git_version.branch, cvs_entry_time=glue.git_version.date) + + # Figure out the data we actually analyzed + outseg = determine_output_segment(inseg, args.psd_segment_length, args.sample_rate, window_fraction) + + ss = append_search_summary(xmldoc, proc_row, ifos=(station,), inseg=inseg, outseg=outseg) + + for sb in event_list: + sb.process_id = proc_row.process_id + sb.search = proc_row.program + #sb.ifo, sb.channel = args.channel_name.split(":") + sb.ifo, sb.channel = station, setname + + xmldoc.childNodes[0].appendChild(event_list) + fname = make_filename(station, inseg) + + utils.write_filename(xmldoc, fname, gz=fname.endswith("gz"), verbose=True) + +Plot trigger results +-------------------- + +:: + + events = SnglBurstTable.read(fname+'.gz') + #del events[10000:] + plot = events.plot('time', 'central_freq', "duration", "bandwidth", color="snr") + #plot = events.plot('time', 'central_freq', color='snr') + #plot.set_yscale("log") + plot.set_ylim(1e-0, 250) + t0 = 1153742417 + plot.set_xlim(t0 + 0*60, t0 + 1*60) + #plot.set_xlim(t0 + 28, t0 + 32) + pyplot.axvline(t0 + 30, color='r') + cb = plot.add_colorbar(cmap='viridis') + plot.savefig("triggers.png") + +Module Access +============= + +Extract Magnetic Field Data +--------------------------- + +Extract magnetic field data from HDF5 files. + +.. currentmodule:: gdas.retrieve + +.. autosummary:: + :toctree: generated/ + + magfield + file_to_segment + construct_utc_from_metadata + generate_timeseries + create_activity_list + retrieve_data_timeseries + retrieve_channel_data + +Plotting routines +----------------- + +Methods to produce time-frequency plots and others + +.. currentmodule:: gdas.plots + +.. autosummary:: + :toctree: generated/ + + plot_activity + plot_time_series + plot_asd + plot_whitening + plot_ts + plot_spectrum + plot_spectrogram + plot_spectrogram_from_ts + plot_triggers + +Utilities +--------- + +Independent routines to do various other things + +.. currentmodule:: gdas.utils + +.. autosummary:: + :toctree: generated/ + + create_sound + + +.. _file_to_segment: + +.. ** Extract segment information +.. +.. The starting and ending UTC times for a specific HDF5 file are determined by using the =Date=, =t0= and =t1= attributes from the metadata. The [[construct_utc_from_metadata][=construct_utc_from_metadata=]] function is then used to calculate the UTC time. Finally, the [[http://software.ligo.org/docs/glue/glue.__segments.segment-class.html][=segment=]] module from the =glue.segments= library is used to represent the range of times in a semi-open interval. +.. +.. #+BEGIN_SRC python +.. def file_to_segment(hfile,segname): +.. # Extract all atributes from the data +.. attrs = hfile[segname].attrs +.. # Define each attribute +.. dstr, t0, t1 = attrs["Date"], attrs["t0"], attrs["t1"] +.. # Construct GPS starting time from data +.. start_utc = construct_utc_from_metadata(dstr, t0) +.. # Construct GPS starting time from data +.. end_utc = construct_utc_from_metadata(dstr, t1) +.. # Represent the range of times in the semi-open interval +.. return segment(start_utc,end_utc) +.. #+END_SRC +.. +.. ** Constructing UTC from metadata +.. <> +.. +.. #+BEGIN_SRC python +.. def construct_utc_from_metadata(datestr, t0str): +.. instr = "%d-%d-%02dT" % tuple(map(int, datestr.split('/'))) +.. instr += t0str +.. t = Time(instr, format='isot', scale='utc') +.. return t.gps +.. #+END_SRC +.. +.. ** Generate time series +.. <> +.. +.. #+BEGIN_SRC python +.. def generate_timeseries(data_list, setname="MagneticFields"): +.. full_data = TimeSeriesList() +.. for seg in sorted(data_list): +.. hfile = h5py.File(data_list[seg], "r") +.. full_data.append(retrieve_data_timeseries(hfile, "MagneticFields")) +.. hfile.close() +.. return full_data +.. #+END_SRC +.. +.. ** Retrieve data time series +.. <> +.. +.. #+BEGIN_SRC python +.. def retrieve_data_timeseries(hfile, setname): +.. dset = hfile[setname] +.. sample_rate = dset.attrs["SamplingRate(Hz)"] +.. gps_epoch = construct_utc_from_metadata(dset.attrs["Date"], dset.attrs["t0"]) +.. data = retrieve_channel_data(hfile, setname) +.. ts_data = TimeSeries(data, sample_rate=sample_rate, epoch=gps_epoch) +.. return ts_data +.. #+END_SRC +.. +.. ** Retrieve channel data +.. <> +.. +.. #+BEGIN_SRC python +.. def retrieve_channel_data(hfile, setname): +.. return hfile[setname][:] +.. #+END_SRC +.. +.. .. _calculate_spectral_correlation: +.. +.. ** Two point spectral correlation +.. +.. For our data, we apply a Tukey window whose flat bit corresponds to =window_fraction= (in percentage) of the segment length (in samples) used for PSD estimation (i.e. =fft_window_len=). This can be done by using the [[http://software.ligo.org/docs/lalsuite/lal/_window_8c_source.html#l00597][=CreateTukeyREAL8Window=]] module from the =lal= library. +.. +.. #+BEGIN_SRC python +.. def calculate_spectral_correlation(fft_window_len, wtype='hann', window_fraction=None): +.. if wtype == 'hann': +.. window = lal.CreateHannREAL8Window(fft_window_len) +.. elif wtype == 'tukey': +.. window = lal.CreateTukeyREAL8Window(fft_window_len, window_fraction) +.. else: +.. raise ValueError("Can't handle window type %s" % wtype) +.. #+END_SRC +.. +.. Once the window is built, a new frequency plan is created which will help performing a [[http://fourier.eng.hmc.edu/e101/lectures/fourier_transform_d/node1.html][forward transform]] on the data. This is done with the [[http://software.ligo.org/docs/lalsuite/lal/group___real_f_f_t__h.html#gac4413752db2d19cbe48742e922670af4][=CreateForwardREAL8FFTPlan=]] module which takes as argument the total number of points in the real data and the measurement level for plan creation (here 1 stands for measuring the best plan). +.. +.. #+BEGIN_SRC python +.. fft_plan = lal.CreateForwardREAL8FFTPlan(len(window.data.data), 1) +.. #+END_SRC +.. +.. We can finally compute and return the two-point spectral correlation function for the whitened frequency series (=fft_plan=) from the window applied to the original time series using the [[http://software.ligo.org/docs/lalsuite/lal/group___time_freq_f_f_t__h.html#ga2bd5c4258eff57cc80103d2ed489e076][=REAL8WindowTwoPointSpectralCorrelation=]] module. +.. +.. #+BEGIN_SRC python +.. return window, lal.REAL8WindowTwoPointSpectralCorrelation(window, fft_plan) +.. #+END_SRC +.. +.. ** Create filter bank +.. <> +.. +.. The construction of a filter bank is fairly simple. For each channel, a frequency domain channel filter function will be created using the [[http://software.ligo.org/docs/lalsuite/lalburst/group___e_p_search__h.html#ga899990cbd45111ba907772650c265ec9][=CreateExcessPowerFilter=]] module from the =lalburst= package. Each channel filter is divided by the square root of the PSD frequency series prior to normalization, which has the effect of de-emphasizing frequency bins with high noise content, and is called "over whitening". The data and metadata are finally stored in the =filter_fseries= and =filter_bank= arrays respectively. Finally, we store on a final array, called =np_filters= the all time-series generated from each filter so that we can plot them afterwards +.. +.. #+BEGIN_SRC python +.. def create_filter_bank(delta_f, flow, band, nchan, psd, spec_corr): +.. lal_psd = psd.lal() +.. lal_filters, np_filters = [],[] +.. for i in range(nchan): +.. lal_filter = lalburst.CreateExcessPowerFilter(flow + i*band, band, lal_psd, spec_corr) +.. np_filters.append(Spectrum.from_lal(lal_filter)) +.. lal_filters.append(lal_filter) +.. return filter_fseries, lal_filters, np_filters +.. #+END_SRC +.. +.. ** Compute filter inner products with themselves +.. <> +.. #+BEGIN_SRC python +.. def compute_filter_ips_self(lal_filters, spec_corr, psd=None): +.. """ +.. Compute a set of inner products of input filters with themselves. If psd +.. argument is given, the unwhitened filter inner products will be returned. +.. """ +.. return numpy.array([lalburst.ExcessPowerFilterInnerProduct(f, f, spec_corr, psd) for f in lal_filters]) +.. #+END_SRC +.. +.. ** Compute filter inner products with adjecant filters +.. <> +.. +.. #+BEGIN_SRC python +.. def compute_filter_ips_adjacent(lal_filters, spec_corr, psd=None): +.. """ +.. Compute a set of filter inner products between input adjacent filters. +.. If psd argument is given, the unwhitened filter inner products will be +.. returned. The returned array index is the inner product between the +.. lal_filter of the same index, and its (array) adjacent filter --- assumed +.. to be the frequency adjacent filter. +.. """ +.. return numpy.array([lalburst.ExcessPowerFilterInnerProduct(f1, f2, spec_corr, psd) for f1, f2 in zip(lal_filters[:-1], lal_filters[1:])]) +.. #+END_SRC +.. +.. .. _compute_channel_renomalization: +.. +.. Compute channel renormalization +.. ------------------------------- +.. +.. Compute the renormalization for the base filters up to a given bandwidth. +.. +.. #+BEGIN_SRC python +.. def compute_channel_renomalization(nc_sum, lal_filters, spec_corr, nchans, verbose=True): +.. mu_sq = (nc_sum+1)*numpy.array([lalburst.ExcessPowerFilterInnerProduct(f, f, spec_corr, None) for f in lal_filters]) +.. # Uncomment to get all possible frequency renormalizations +.. #for n in xrange(nc_sum, nchans): # channel position index +.. for n in xrange(nc_sum, nchans, nc_sum+1): # channel position index +.. for k in xrange(0, nc_sum): # channel sum index +.. # FIXME: We've precomputed this, so use it instead +.. mu_sq[n] += 2*lalburst.ExcessPowerFilterInnerProduct(lal_filters[n-k], lal_filters[n-1-k], spec_corr, None) +.. #print mu_sq[nc_sum::nc_sum+1] +.. return mu_sq +.. #+END_SRC +.. +.. ** Measure root-sum-square strain (hrss) +.. <> +.. +.. #+BEGIN_SRC python +.. def measure_hrss(z_j_b, uw_ss_ii, uw_ss_ij, w_ss_ij, delta_f, delta_t, filter_len, dof): +.. """ +.. Approximation of unwhitened sum of squares signal energy in a given EP tile. +.. See T1200125 for equation number reference. +.. z_j_b - time frequency map block which the constructed tile covers +.. uw_ss_ii - unwhitened filter inner products +.. uw_ss_ij - unwhitened adjacent filter inner products +.. w_ss_ij - whitened adjacent filter inner products +.. delta_f - frequency binning of EP filters +.. delta_t - native time resolution of the time frequency map +.. filter_len - number of samples in a fitler +.. dof - degrees of freedom in the tile (twice the time-frequency area) +.. """ +.. s_j_b_avg = uw_ss_ii * delta_f / 2 +.. # unwhitened sum of squares of wide virtual filter +.. s_j_nb_avg = uw_ss_ii.sum() / 2 + uw_ss_ij.sum() +.. s_j_nb_avg *= delta_f +.. s_j_nb_denom = s_j_b_avg.sum() + 2 * 2 / filter_len * \ +.. numpy.sum(numpy.sqrt(s_j_b_avg[:-1] * s_j_b_avg[1:]) * w_ss_ij) +.. # eqn. 62 +.. uw_ups_ratio = s_j_nb_avg / s_j_nb_denom +.. # eqn. 63 -- approximation of unwhitened signal energy time series +.. # FIXME: The sum in this equation is over nothing, but indexed by frequency +.. # I'll make that assumption here too. +.. s_j_nb = numpy.sum(z_j_b.T * numpy.sqrt(s_j_b_avg), axis=0) +.. s_j_nb *= numpy.sqrt(uw_ups_ratio / filter_len * 2) +.. # eqn. 64 -- approximate unwhitened signal energy minus noise contribution +.. # FIXME: correct axis of summation? +.. return math.sqrt(numpy.sum(numpy.absolute(s_j_nb)**2) * delta_t - s_j_nb_avg * dof * delta_t) +.. #+END_SRC +.. +.. ** Unwhitened inner products filtering +.. <> +.. +.. #+BEGIN_SRC python +.. # < s^2_j(f_1, b) > = 1 / 2 / N * \delta_t EPIP{\Theta, \Theta; P} +.. def uw_sum_sq(filter1, filter2, spec_corr, psd): +.. return lalburst.ExcessPowerFilterInnerProduct(filter1, filter2, spec_corr, psd) +.. #+END_SRC +.. +.. ** Unwhitened sum of squares signal +.. <> +.. +.. #+BEGIN_SRC python +.. def measure_hrss_slowly(z_j_b, lal_filters, spec_corr, psd, delta_t, dof): +.. """ +.. Approximation of unwhitened sum of squares signal energy in a given EP tile. +.. See T1200125 for equation number reference. NOTE: This function is deprecated +.. in favor of measure_hrss, since it requires recomputation of many inner products, +.. making it particularly slow. +.. """ +.. # FIXME: Make sure you sum in time correctly +.. # Number of finest bands in given tile +.. nb = len(z_j_b) +.. # eqn. 56 -- unwhitened mean square of filter with itself +.. uw_ss_ii = numpy.array([uw_sum_sq(lal_filters[i], lal_filters[i], spec_corr, psd) for i in range(nb)]) +.. s_j_b_avg = uw_ss_ii * lal_filters[0].deltaF / 2 +.. # eqn. 57 -- unwhitened mean square of filter with adjacent filter +.. uw_ss_ij = numpy.array([uw_sum_sq(lal_filters[i], lal_filters[i+1], spec_corr, psd) for i in range(nb-1)]) +.. # unwhitened sum of squares of wide virtual filter +.. s_j_nb_avg = uw_ss_ii.sum() / 2 + uw_ss_ij.sum() +.. s_j_nb_avg *= lal_filters[0].deltaF +.. +.. # eqn. 61 +.. w_ss_ij = numpy.array([uw_sum_sq(lal_filters[i], lal_filters[i+1], spec_corr, None) for i in range(nb-1)]) +.. s_j_nb_denom = s_j_b_avg.sum() + 2 * 2 / len(lal_filters[0].data.data) * \ +.. (numpy.sqrt(s_j_b_avg[:-1] * s_j_b_avg[1:]) * w_ss_ij).sum() +.. +.. # eqn. 62 +.. uw_ups_ratio = s_j_nb_avg / s_j_nb_denom +.. +.. # eqn. 63 -- approximation of unwhitened signal energy time series +.. # FIXME: The sum in this equation is over nothing, but indexed by frequency +.. # I'll make that assumption here too. +.. s_j_nb = numpy.sum(z_j_b.T * numpy.sqrt(s_j_b_avg), axis=0) +.. s_j_nb *= numpy.sqrt(uw_ups_ratio / len(lal_filters[0].data.data) * 2) +.. # eqn. 64 -- approximate unwhitened signal energy minus noise contribution +.. # FIXME: correct axis of summation? +.. return math.sqrt((numpy.absolute(s_j_nb)**2).sum() * delta_t - s_j_nb_avg * dof * delta_t) +.. #+END_SRC +.. +.. ** Measure root-mean square strain poorly +.. <> +.. +.. #+BEGIN_SRC python +.. def measure_hrss_poorly(tile_energy, sub_psd): +.. return math.sqrt(tile_energy / numpy.average(1.0 / sub_psd) / 2) +.. #+END_SRC +.. +.. ** List triggers from map +.. <> +.. +.. #+BEGIN_SRC python +.. def trigger_list_from_map(tfmap, event_list, threshold, start_time, start_freq, duration, band, df, dt, psd=None): +.. +.. # FIXME: If we don't convert this the calculation takes forever --- but we should convert it once and handle deltaF better later +.. if psd is not None: +.. npy_psd = psd.numpy() +.. +.. start_time = LIGOTimeGPS(float(start_time)) +.. ndof = 2 * duration * band +.. +.. spanf, spant = tfmap.shape[0] * df, tfmap.shape[1] * dt +.. print "Processing %.2fx%.2f time-frequency map." % (spant, spanf) +.. +.. for i, j in zip(*numpy.where(tfmap > threshold)): +.. event = event_list.RowType() +.. +.. # The points are summed forward in time and thus a `summed point' is the +.. # sum of the previous N points. If this point is above threshold, it +.. # corresponds to a tile which spans the previous N points. However, th +.. # 0th point (due to the convolution specifier 'valid') is actually +.. # already a duration from the start time. All of this means, the + +.. # duration and the - duration cancels, and the tile 'start' is, by +.. # definition, the start of the time frequency map if j = 0 +.. # FIXME: I think this needs a + dt/2 to center the tile properly +.. event.set_start(start_time + float(j * dt)) +.. event.set_stop(start_time + float(j * dt) + duration) +.. event.set_peak(event.get_start() + duration / 2) +.. event.central_freq = start_freq + i * df + 0.5 * band +.. +.. event.duration = duration +.. event.bandwidth = band +.. event.chisq_dof = ndof +.. +.. event.snr = math.sqrt(tfmap[i,j] / event.chisq_dof - 1) +.. # FIXME: Magic number 0.62 should be determine empircally +.. event.confidence = -lal.LogChisqCCDF(event.snr * 0.62, event.chisq_dof * 0.62) +.. if psd is not None: +.. # NOTE: I think the pycbc PSDs always start at 0 Hz --- check +.. psd_idx_min = int((event.central_freq - event.bandwidth / 2) / psd.delta_f) +.. psd_idx_max = int((event.central_freq + event.bandwidth / 2) / psd.delta_f) +.. +.. # FIXME: heuristically this works better with E - D -- it's all +.. # going away with the better h_rss calculation soon anyway +.. event.amplitude = measure_hrss_poorly(tfmap[i,j] - event.chisq_dof, npy_psd[psd_idx_min:psd_idx_max]) +.. else: +.. event.amplitude = None +.. +.. event.process_id = None +.. event.event_id = event_list.get_next_id() +.. event_list.append(event) +.. #+END_SRC +.. +.. ** Determine output segment +.. <> +.. +.. #+BEGIN_SRC python +.. def determine_output_segment(inseg, dt_stride, sample_rate, window_fraction=0.0): +.. """ +.. Given an input data stretch segment inseg, a data block stride dt_stride, the data sample rate, and an optional window_fraction, return the amount of data that can be processed without corruption effects from the window. +.. +.. If window_fration is set to 0 (default), assume no windowing. +.. """ +.. # Amount to overlap successive blocks so as not to lose data +.. window_overlap_samples = window_fraction * sample_rate +.. outseg = inseg.contract(window_fraction * dt_stride / 2) +.. +.. # With a given dt_stride, we cannot process the remainder of this data +.. remainder = math.fmod(abs(outseg), dt_stride * (1 - window_fraction)) +.. # ...so make an accounting of it +.. outseg = segment(outseg[0], outseg[1] - remainder) +.. return outseg +.. #+END_SRC +.. +.. ** Make tiles +.. <> +.. +.. #+BEGIN_SRC python +.. def make_tiles(tf_map, nc_sum, mu_sq): +.. tiles = numpy.zeros(tf_map.shape) +.. sum_filter = numpy.ones(nc_sum+1) +.. # Here's the deal: we're going to keep only the valid output and +.. # it's *always* going to exist in the lowest available indices +.. for t in xrange(tf_map.shape[1]): +.. # Sum and drop correlate tiles +.. # FIXME: don't drop correlated tiles +.. output = numpy.convolve(tf_map[:,t], sum_filter, 'valid')[::nc_sum+1] +.. #output = fftconvolve(tf_map[:,t], sum_filter, 'valid')[::nc_sum+1] +.. tiles[:len(output),t] = numpy.absolute(output) / math.sqrt(2) +.. return tiles[:len(output)]**2 / mu_sq[nc_sum::nc_sum+1].reshape(-1, 1) +.. #+END_SRC +.. +.. ** Create a time frequency map +.. <> +.. +.. In this function, we create a time frequency map with resolution similar than =tf_map= but rescale by a factor of =nc_sum= + 1. All tiles will be independent up to overlap from the original tiling. The =mu_sq= is applied to the resulting addition to normalize the outputs to be zero-mean unit-variance Gaussian variables (if the input is Gaussian). +.. +.. #+BEGIN_SRC python +.. def make_indp_tiles(tf_map, nc_sum, mu_sq): +.. tiles = tf_map.copy() +.. # Here's the deal: we're going to keep only the valid output and +.. # it's *always* going to exist in the lowest available indices +.. stride = nc_sum + 1 +.. for i in xrange(tiles.shape[0]/stride): +.. numpy.absolute(tiles[stride*i:stride*(i+1)].sum(axis=0), tiles[stride*(i+1)-1]) +.. return tiles[nc_sum::nc_sum+1].real**2 / mu_sq[nc_sum::nc_sum+1].reshape(-1, 1) +.. #+END_SRC +.. +.. ** Create output filename +.. <> +.. +.. #+BEGIN_SRC python +.. def make_filename(ifo, seg, tag="excesspower", ext="xml.gz"): +.. if isinstance(ifo, str): +.. ifostr = ifo +.. else: +.. ifostr = "".join(ifo) +.. st_rnd, end_rnd = int(math.floor(seg[0])), int(math.ceil(seg[1])) +.. dur = end_rnd - st_rnd +.. return "%s-%s-%d-%d.%s" % (ifostr, tag, st_rnd, dur, ext) +.. #+END_SRC + diff --git a/docs/_sources/index.rst.txt b/docs/_sources/index.rst.txt index c99a194..2225576 100644 --- a/docs/_sources/index.rst.txt +++ b/docs/_sources/index.rst.txt @@ -1,13 +1,5 @@ -GNOME Data Analysis Software -============================ - -.. toctree:: - :maxdepth: 2 - - index.rst - -Introduction -============ +Welcome! +======== This package contains functions useful for magnetic field signal processing, with a focus on Excess Power search analysis and application on the data for the GNOME collaboration, see `Pustelny et al. (2013) `_. This documentation details all the available functions and tasks available through this software. Here are some example tasks that can (or will soon to) be handled: @@ -18,1379 +10,33 @@ This package contains functions useful for magnetic field signal processing, wit * Cross-correlation of continuous sine wave signals. * Perform Allan Standard deviation. +Should you have any questions or suggested corrections to be made, do not hesitate to `contact me `_. + .. raw:: html Fork me on GitHub -Installation -============ - -The program requires the following general packages to run: `Numpy `_, `Matplotlib `_, `Scipy `_ and `Astropy `_. The following LIGO-related packages are also required for full functionality: `Gwpy `_, `PyCBC `_, `Glue `_, `LAL `_, `LALburst `_ and `LALsimulation `_. - -While most of the packages can be installed automatically using `pip `_, some LIGO packages (Glue, LAL, LALburst and LALsimulation) must be installed separately beforehand as they contain several C routines that need specific compilation. However, these packages are already included in a bigger package called `LALsuite `_ which can be installed fairly easily on Debian (Linux) and Mac OS machines. - -LALsuite tools --------------- - -Some useful pages on how to download and install the LIGO software can be found `here `_. - -MacPorts (Mac) -~~~~~~~~~~~~~~ - -For Mac users, the installation is pretty easy, detailed information can be found on `this page `_. You need to have `MacPorts `_ installed. The following commands should suffice to install the LALsuite package on your machine:: - - sudo port install lscsoft-deps - sudo port install glue - sudo port install lalapps - -The first command will install all the dependencies needed for the LIGO software to be installed. The following 2 commands will install the actual packages. - -apt-get (Debian) -~~~~~~~~~~~~~~~~ - -Since the LIGO software is not a default package in the apt package manager system on Debian machine, additional steps will be needed. The first step is to add the following links to the source list located at ``/etc/apt/sources.list``:: - - deb [arch=amd64] http://software.ligo.org/lscsoft/debian jessie contrib - deb-src [arch=amd64] http://software.ligo.org/lscsoft/debian jessie contrib - -Note that the ``[arch=amd64]`` is needed to fix the architecture problem in case it tries to install i386 version on 64-bit Debian. Once the sources have been added, you must first install all the dependencies as follows:: - - apt-get install build-essential automake autoconf libtool devscripts - -The LIGO software can finally be installed using the following command:: - - apt-get install lscsoft-all - -Main Program ------------- - -The best way to install the GNOME software along with the rest of the dependencies is by using `pip`:: - - pip install gdas - -(You may need to put a ``sudo`` in front of this). For this to work -you need to have `pip -`_ installed. This -method allows for easy uninstallation. - -You can also simply download the tarball from the PyPI website, unpack it and then do:: - - python setup.py install - -The latest stable package can be downloaded from PyPI: https://pypi.python.org/pypi/gdas. -The development version can be downloaded from `here `_. - -Multi-user Server -================= - -A GNOME JupyterHub, or multi-user server has been created to allow each member to access the entire available dataset. Member who do not have access to the server but wish to access it should send a request to Dr. Sam Afach. Member who are not part of the GNOME collaboration will not be granted access to the dataset but are free to use our software on their own data. - -The server can be accessed in two ways, either by acceding the `server's webpage `_, or from your terminal through SSH:: - - ssh -X username@budker.uni-mainz.de -p 8022 - -While SSH is very handy for people using UNIX-like operating systems, this can become more complicated for those working on Windows machines. Fortunately, access to a terminal is also possible through the webpage, which means directly from your internet browser! This can be done by clicking on the New tab after login and select Terminal: - -.. figure:: img/jupyter1.png - :width: 70% - :align: center - -You can then use the terminal window to access files and create new Python scripts for your analysis. - -.. figure:: img/jupyter2.png - :width: 70% - :align: center - -Working Example -=============== - -Either on your own computer or on the server, on a Jupyter notebook or on a Python script, the first thing to do is to import the ``gdas`` package that contain all the modules present in the GNOME software. That can be done easily by doing the following:: - - import gdas - -In order to retrieve a specific chunk of data to be analyzed for a particular station, the name of the station along with the start and end dates should be specified:: - - station = 'fribourg01' - start_time = '2016-11-03-04' - end_time = '2016-11-03-04-2' - -where the start and end times should always have at least the year, month and day specified, and with the values separated by a dash symbol. Hour and minute can also be specified. - -If you are not working on the server and the data are located in a different repository than ``/GNOMEDrive/gnome/serverdata/``, a custom path can be defined. For instance:: - - datapath = '/Users/vincent/data/GNOMEDrive/gnome/serverdata/' - -The magnetic field data can then be retrieve as follows:: - - ts_data,ts_list,activity = gdas.magfield(station,start_time,end_time,rep=datapath) - -The ``gdas.magfield`` method will return 3 arrays of data that can then be used to produce different plots:: - - gdas.plot_activity(activity) - gdas.plot_time_series(station,ts_list,seglist=activity) - gdas.plot_asd(station,ts_list) - gdas.plot_whitening(station,ts_list,activity) - -This is a script to do Excess Power analysis:: - - psd_segment_length = 60 - psd_segment_stride = 30 - psd_estimation = 'median-mean' - window_fraction = 0 - tile_fap = 1e-5 - channels = 250 - - gdas.excess_power(ts_data,psd_segment_length,psd_segment_stride,psd_estimation,window_fraction,tile_fap,station,nchans=channels) - gdas.plot_triggers() - -Data extraction -=============== - -Extracting real data --------------------- - -Retrieve metadata -~~~~~~~~~~~~~~~~~ - -The first step is to define some variables related to which data we want to study and their location. The ``os.path.join`` method will join that different paths called as arguments (i.e. in the parenthesis):: - - # Set name of the channel to extract - setname = "MagneticFields" - # Define station name and map - station = "fribourg01" - # Define year, month and day - year,month,day = '2016','11','03' - # Define path to main data repository - path1 = '/Users/vincent/ASTRO/data/GNOMEDrive/gnome/serverdata/' - # Define path to day repository - path2 = "%s/%s/%s/%s/"%(station,year,month,day) - # Define generic hdf5 filenames - path3 = "%s_%s%s%s_*.hdf5"%(station,year,month,day) - # Define full generic path name - fullpath = os.path.join(path1,path2,path3) - -We then use the `glob `_ module to list all the files that satisfy the full path name and loop over each HDF5 file and do the following: - -- Extract its metadata using the `h5py `_ package; -- Calculate the segment in time for which the data corresponds to using the :ref:`file_to_segment ` function; -- Store each filename and metadata on two different dictionary variables ``file_order`` and ``file_order``. - -Finally, we extract the sampling rate from one of the file which will be use later in the analysis. The sampling rate is the same for all the data files:: - - # Initialising dictionary for data - file_order,data_order = {},{} - # Loop over all existing data files - for fname in glob.glob(fullpath): - # Read hdf5 file - hfile = h5py.File(fname, "r") - # Extract segment information from file - segfile = file_to_segment(hfile,setname) - # Associate file in dictionary with association to segment data - file_order[segfile] = fname - data_order[segfile] = hfile - # Retrieve sampling rate from last read file - sample_rate = hfile[setname].attrs["SamplingRate(Hz)"] - -Creating segment lists -~~~~~~~~~~~~~~~~~~~~~~ - -This section will create a continuous list of all the data segments available. We use the following modules in order to create the list properly: - -- The `segmentlist `_ module from the ``glue.segments`` library defines the list of segments. The =coalesce()= method is then used to put all the segments in coalesced state. -- The `DataQualityDict `_ module from the ``gwpy.segments`` library allows to store all the data segments in an ordered dictionary. -- The `DataQualityFlag `_ module from the ``gwpy.segments`` library allows to *record times during which the instrument was operating outside of its nominal condition*. - -The script is as follows:: - - # Generate an ASCII representation of the GPS timestamped segments of time covered by the input data - seglist = segmentlist(data_order.keys()) - # Sort the segment list - seglist.sort() - # Initialise dictionary for segment information - full_seglist = DataQualityDict() - # Save time span for each segment in ASCII file - with open("segments.txt", "w") as fout: - for seg in seglist: - print >>fout, "%10.9f %10.9f" % seg - # FIXME: Active should be masked from the sanity channel - full_seglist[station] = DataQualityFlag(station,active=seglist.coalesce(),known=seglist.coalesce()) - # Define start and end time of entire dataset - start, end = full_seglist[station].active.extent() - -Establishing active times -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Here's the script:: - - # Generate an ASCII representation of the GPS timestamped segments of time covered by the input data - seglist = segmentlist(data_order.keys()) - # Sort the segment list - seglist.sort() - # Import gwpy tools - plot = SegmentPlot() - # Initialize plotting figure - ax = plot.gca() - # Plot all segment in figure - ax.plot(full_seglist) - # Save figure - pyplot.savefig("activity.png",dpi=500) - -Retrieve and concatenate the data. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Here's the script:: - - # Generate time series for the ensemble of data - data_list = generate_timeseries(file_order,setname) - # Retrieve channel data for all the segments - full_data = numpy.hstack([retrieve_channel_data(data_order[seg],setname) for seg in seglist]) - # Define log base 2 of the total time length of the full data - loglength = math.log(len(full_data)/sample_rate, 2) - # Define zero padding - zpad = math.ceil(loglength) - zpad = int(2**zpad) - len(full_data)/sample_rate - zpad = numpy.zeros(int(zpad*sample_rate / 2.0)) - # Include padding next to the data - full_data = numpy.hstack((zpad, full_data, zpad)) - # Models a time series consisting of uniformly sampled scalar values - ts_data = types.TimeSeries(full_data,delta_t=1/sample_rate,epoch=seglist[0][0]) - # Loop over all the elements in the dictionary - for v in data_order.values(): - # Close the element - v.close() - -Producing fake data -------------------- - -Create simulated time series data -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -It is easy to create fake data, one can use the `numpy.random.normal `_ method from the Numpy library to draw random samples from a normal Gaussian distribution with mean of 0, standard deviation of 1, and a length equal to the sampling rate (``args.sample_rate``) times the length in seconds of individual segments (``args.psd_segment_length``) times the number of segment the user wish to produce. After defining the starting UTC time, one can then create a time series of the data using the `TimeSeries `_ module from the ``gwpy.timeseries`` library.:: - - print "Create fake data..." - start = 1153742437.0 - end = start + args.psd_segment_length * 16 - station = "gaussian-noise" - setname = "MagneticFields" - full_data = numpy.random.normal(0, 1, int(args.sample_rate * args.psd_segment_length * 16)) - ts_data = TimeSeries(full_data, sample_rate=args.sample_rate,epoch=start) - -Produce and plot fake signal -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Here's the script:: - - delta_t = 1.0/args.sample_rate - filter_band = 4 - #q = math.sqrt(2)*f_0/filter_band * 2 - #f_0 = 18 - duration = 0.1 - hrss = 0.0275 - #hp, hx = SimBurstSineGaussian(q * 2, f_0, hrss, 1, 0, data_dt) - hp, hx = SimBurstGaussian(duration, hrss, delta_t) - hp = TimeSeries.from_lal(hp) - hx = TimeSeries.from_lal(hx) - # We rescale the amplitude to hide or expose it in the data a bit better - hp *= 100. - - pyplot.figure() - pyplot.plot(hp.times, hp, 'k-') - pyplot.xlim([-0.5, 0.5]) - pyplot.ylim([-0.1, 0.1]); - pyplot.xlabel('Time (s)') - pyplot.ylabel('Magnitude') - pyplot.savefig('fakesignal.png') - pyplot.close() - -Inject fake signal into artificial data -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Here's the script:: - - random_time = int((start+end)/2.) - st = (random_time-start)*args.sample_rate - len(hp)/2 - en = st + len(hp) - hp.epoch = random_time - ts_data[st:en] += hp - data_list = [ts_data] - ts_data = types.TimeSeries(ts_data.value,delta_t=1.0/args.sample_rate,epoch=start) - -Plotting Data -============= - -Generate a plot of the data time series ---------------------------------------- - -Here's the script:: - - # Include time series element in dictionary - plot = TimeSeriesPlot() - # Create axis in plot - ax = plot.gca() - # Loop over all the time series - for ts in data_list: - # Plot time series for each segment - ax.plot(ts, color='blue') - # Display title - ax.set_title(station) - # Plot activity segments - plot.add_state_segments(SegmentList(full_seglist[station].active),plotargs={'label':'data present','facecolor': 'g','edgecolor': 'k'}) - # Define edges of the x axis - ax.set_xlim(start, end) - # Save figure - plot.savefig('time_series.png',dpi=500) - -Create sound based on the data ------------------------------- - -Here's the script:: - - wout = wave.open("pure_tone.wav", "w") - wout.setnchannels(1) # mono - wout.setsampwidth(4) # 32 bit audio - wout.setframerate(1000) - wout.writeframes(ts[:]) - wout.close() - -Invoking precision issues -------------------------- - -AGG complexity starts to complain with large numbers of points and we somehow invoke precision issues that need to be ameliorated:: - - for d in data_list: - d.x0 = Quantity(int(d.x0.value * 500), d.xunit) - d.dx = Quantity(1, d.xunit) - data_list.coalesce() - for d in data_list: - d.x0 = Quantity(d.x0.value / 500, d.xunit) - d.dx = Quantity(0.002, d.xunit) - -Amplitude Spectral Density (ASD) --------------------------------- - -Here's the script:: - - # Initialize plotting functionality - plot = SpectrumPlot() - # Loop over all the time series - for d in data_list: - # Generate 8 seconds per FFT with 4 second (50%) overlap - spectrum = d.asd(8, 4) - # Create plotting axis - ax = plot.gca() - # Plot square root of the spectrum - ax.plot(numpy.sqrt(spectrum)) - # Set x axis to log scale - ax.set_xscale('log') - # Set y axis to log scale - ax.set_yscale('log') - # Set x axis limits - ax.set_xlim(1e-1, 500) - # Save figure - plot.savefig("asd.png",dpi=500) - -(Un)normalized Spectrograms ---------------------------- - -The first thing to do is to initialise the plotting axis for both figure as well as some display settings specific to spectrogram and which can be loaded using the `SpectrogramPlot() `_ module from the ``gwpy.plotter`` library:: - - plot = SpectrogramPlot() - ax = plot.gca() - white_plot = SpectrogramPlot() - wax = white_plot.gca() - -The spectrogram is then created using the `spectrogram `_ function from the ``gwpy.timeseries.TimeSeries`` package. This will *calculate the average power spectrogram of this TimeSeries using the specified average spectrum method* (default being the Welch's method). We define the 3 following variables that will be used to construct the spectrogram: - -- ``stride``: number of seconds in single PSD (column of spectrogram), default 20; -- ``fftlength``: number of seconds in single FFT, default 6; -- ``overlap``: number of seconds between FFTs, default 3. - -We can then loop over all the time series made from each loaded HDF5 data file, and construct the spectrogram for each time series. The whitening of the spectrogram is then done by normalisation it, which can be performed using the `ratio `_ method from the ``gwpy.spectrogram.Spectrogram`` library. This will calculate the ratio of the created spectrogram against a specific reference, here we chose the reference to be the median of each spectrum in the given spectrogram: - -.. math:: - \sqrt{S(f,t)}/\sqrt{\overline{S(f)}} - -The script is as follows:: - - for ts in data_list: - if (len(ts) * ts.dt).value < stride: - continue - spec = ts.spectrogram(stride, fftlength=fftlength, overlap=overlap) - ax.plot(spec) - wspec = spec.ratio('median') - wax.plot(wspec, vmin=0.1, vmax=100) - -Finally, the plot can be completed by including the activity period below each figure:: - - ax.set_title(station) - ax.set_xlim(seglist[0][0], seglist[-1][1]) - ax.set_ylim(1e-1, 500) - ax.set_yscale('log') - plot.add_colorbar(log=True) - plot.add_state_segments(SegmentList(full_seglist[station].active),plotargs={'label':'data present','facecolor':'g','edgecolor':'k'}) - plot.savefig("spectrogram.png",dpi=500) - - wax.set_title(station) - wax.set_xlim(seglist[0][0], seglist[-1][1]) - wax.set_ylim(1e-1, 500) - wax.set_yscale('log') - white_plot.add_colorbar(log=True) - white_plot.add_state_segments(SegmentList(full_seglist[station].active),plotargs={'label':'data present','facecolor':'g','edgecolor':'k'}) - white_plot.savefig("whitened_spectrogram.png",dpi=500) - -Excess-Power algorithm -====================== - -General overview ----------------- - -The **Excess Power method** is known as the *optimal detection strategy* to search for burst signals for which only the duration and frequency band are known, which is basically the case for GNOME and its search of Axion-Like Particles (ALP). This method was developed and introduced by `Anderson et al. (200) `_ and has been extensively used in the detection of burst sources of gravitational radiation. A more technical documentation was written by `Brady et al. (2007) `_ describing how the algorithm used by the LIGO collaboration works and how the theory is translated into code. - -We present below a step-by-step procedure followed during the Excess Power search analysis. For a better representation of what is happening, the figure at the end shows how the data is being split and analysed to search for multiple signals of different bandwidth and duration in the time-frequency plane. - -- :ref:`Time domain segmentation and PSD estimate ` - - We first estimate the instrument's noise Power Spectral Density (PSD) by splitting the time-series data into multiple overlapping segments. A periodogram for each segment is calculated separately and then averaged, which will reduce the variance of the individual power measurements. The result is a frequency series where samples are separated in frequency space by :math:`\Delta f` equal to the inverse of a segment’s length and with a high end frequency limit equal to the Nyquist limit. The final power spectrum will help reveal the existence, or the absence, of repetitive patterns and correlation structures in a signal process. - -- :ref:`Comb of frequency channels ` - - We then split the PSD frequency series into multiple channels. For each channel, a frequency domain filter is created with a :math:`\Delta f` determined by the PSD and a total extent in Fourier space that is twice the stated bandwidth of a channel. The result is a list of each channel filter's frequency series. - -- :ref:`Creating analysing blocks ` - - The Excess Power method can lead to moderately-large computational requirements, and it has been found that the computational efficiency of this implementation can be improved upon by considering blocks of data that are much longer than the longest signal time duration. The entire time series is therefore split into separate blocks. We use the length of the segments used for PSD estimate to define the duration of each block. For each block, the time series is c0Aonverted into frequency series which is then filtered by the filter bank throughout all the channels. A time-frequency map is finally created which stores all the filtered frequency series from each channel. - -- :ref:`Creating tiles with different bandwidth ` - - We can now construct tiles with different bandwidth by summing multiple channels together. - -- :ref:`Exploring tiles with different duration ` - - For each given tile's bandwidth, one can investigate different tile's duration. This can be done by exploring different number of degrees of freedom, :math:`d$, which can be calculated as follows: :math:`d=2BT` where :math:`B` and :math:`T` are respectively the bandwidth and duration of the tile. Section 2.2.5 of `Brady et al. `_ gives a great description of how to interpret the number of degrees of freedom. Therefore, by changing the :math:`d$, one can explore multiple tile's duration for different bandwidth. - -- :ref:`Define triggering signal ` - - The energy of each tile in the time-frequency space is calculated and compare to a user-defined threshold value. After defining a tile false alarm probability threshold in Gaussian noise and using the number of degrees of freedom for each tile, one can define a energy threshold value above which a burst trigger can be identified by comparing the energy threshold with the tile's energy in the time-frequency map. A tile energy time frequency map plot similar to Figure 5 in `Pustelny et al. (2013) `_ can then be made which plots the outlying tile energies present in the data. - -.. figure:: ./img/overview.png - - Overview of the Excess Power method and difference between segments, channels, tiles and blocks. - -.. _psdestimate: - -Estimate Power Spectral Density (PSD) -------------------------------------- - -The instrument's noise Power Spectral Density (PSD) will be used to whiten the data and help reveal the existence, or the absence, of repetitive patterns and correlation structures in the signal process. It will also determine the total bandwidth spanned by each of the filters that will subsequently be created. The first thing to do before calculating the PSD is to ensure that the time series data is converted into an array of floating values. :: - - # Convert time series as array of float - data = ts_data.astype(numpy.float64) - -The PSD is calculated by splitting up the signal into overlapping segments and scan through each segment to calculate individual periodogram. The periodograms from each segment are then averaged, reducing the variance of the individual power measurements. In order to proceed, we need to define the average method, ``avg_method``, that will be used to measure the PSD from the data. This can be specified with the ``--psd-estimation`` option. :: - - # Average method to measure PSD from the data - avg_method = args.psd_estimation - -One also needs to specify the length of each segment, ``seg_len``, as well as the separation between 2 consecutive segments, ``seg_stride``. Both parameters can be defined in second units with the ``--psd-segment-length`` and ``--psd-segment-stride`` arguments respectively and can then be converted into sample unit. :: - - # The segment length for PSD estimation in samples - seg_len = int(args.psd_segment_length * args.sample_rate) - # The separation between consecutive segments in samples - seg_stride = int(args.psd_segment_stride * args.sample_rate) - -We then use the `Welch's method `_ to perform the power spectral density estimate using the `welch `_ module from the ``pycbc.psd`` library. What this will do is to compute the discrete Fourier transform for each PSD segment to produce invidual periodograms, and then compute the squared magnitude of the result. The individual periodograms are then averaged using the user-defined average method, ``avg_method``, and return the frequency series, ``fd_psd``, which will store the power measurement for each frequency bin. :: - - # Lifted from the psd.from_cli module - fd_psd = psd.welch(data,avg_method=avg_method,seg_len=seg_len,seg_stride=seg_stride) - # Plot the power spectral density - plot_spectrum(fd_psd) - # We need this for the SWIG functions - lal_psd = fd_psd.lal() - -One can display the power measurements, frequency array and frequency between consecutive samples, :math:`\Delta f` in Hertz, by printing the following variables: :: - - print 'Display power measurements of the first 10 frequency bins' - print fd_psd[:10] - print 'Display central frequency of the first 10 bins' - print fd_psd.sample_frequencies[:10] - print 'Display the frequency separation between bins' - print fd_psd.delta_f - -$\Delta f` corresponds to the inverse of a segment's length which is the smallest frequency (i.e. highest period) of detectable signals in each segment. The frequency range spans from 0 to the Nyquist frequency, i.e. half de the sampling rate. - -Checking filtering settings ---------------------------- - -The first thing to check is that the frequency of the high-pass filter (if defined) is below the minimum frequency of the filter bank. Indeed, a high-pass filter will only let pass frequency that are higher than the cutoff frequency (here defined by the ``strain_high_pass`` argument). If the high pass frequency is greater from the minimum frequency in the filter bank, the signal with frequencies lower than the cutoff frequency will get attenuated. :: - - if args.min_frequency < args.strain_high_pass: - print >>sys.stderr, "Warning: strain high pass frequency %f is greater than the tile minimum frequency %f --- this is likely to cause strange output below the bandpass frequency" % (args.strain_high_pass, args.min_frequency) - -In case the maximum frequency in the filter bank is not defined, we set it to be equal to the Nyquist frequency, i.e. half the sampling rate, which makes sense as a larger signal will not be able to get easily identifiable. :: - - if args.max_frequency is None: - args.max_frequency = args.sample_rate / 2.0 - -If the bandwidth of the finest filter (``--tile-bandwidth`` argument, see section :ref:`construct_args ` or the number of frequency channels (=--channels= argument) is not defined but the total spectral band is (``data_band``), one can then determined all the filter settings as follows: :: - - - if args.tile_bandwidth is None and args.channels is None: - # Exit program with error message - exit("Either --tile-bandwidth or --channels must be specified to set up time-frequency plane") - else: - # Define as assert statement that tile maximum frequency larger than its minimum frequency - assert args.max_frequency >= args.min_frequency - # Define spectral band of data - data_band = args.max_frequency - args.min_frequency - # Check if tile bandwidth or channel is defined - if args.tile_bandwidth is not None: - # Define number of possible filter bands - nchans = args.channels = int(data_band / args.tile_bandwidth) - 1 - elif args.channels is not None: - # Define filter bandwidth - band = args.tile_bandwidth = data_band / (args.channels + 1) - assert args.channels > 1 - -The minimum frequency to be explored can be user-defined by using the ``--min-frequency`` option. :: - - # Lowest frequency of the first filter - flow = args.min_frequency - -Whitening window and spectral correlation ------------------------------------------ - -This part determines how much data on either side of the tukey window is to be discarded. Nominally, this means that one will lose ``window_fraction`` * ``args.psd_segment_length`` to corruption from the window, i.e. this is simply discarded. This is tuned to give an integer offset when used with ``args.psd_segment_length`` equal to 8, smaller windows will have fractions of integers, but larger powers of two will still preseve this (probably not a big deal in the end). :: - - window_fraction = 0 - -The two point spectral correlation is then done with the :ref:`calculate_spectral_correlation ` function which will return both the Tukey window applied to the original time series data and the actual two-point spectral correlation function for the whitened frequency series from the applied whitening window. :: - - # Do two point spectral correlation - window, spec_corr = calculate_spectral_correlation(seg_len,'tukey',window_fraction=window_fraction) - window = window.data.data - window_sigma_sq = numpy.mean(window**2) - # Pre scale the window by its root mean squared -- see eqn 11 of EP document - #window /= numpy.sqrt(window_sigma_sq) - -.. _filterbank: - -Computing the filter bank -------------------------- - -The filter bank will create band-pass filters for each channel in the PSD frequency domain. The :ref:`create_filter_bank ` function will san the bandwidth from the central frequency of the first channel (i.e. flow+band/2) to final frequency of the last channel (i.e. band*nchans) in a increment equal to the frequency band. The filter's total extent in Fourier space is actually twice the stated bandwidth (FWHM). :: - - # Define filters - filter_bank, fdb = create_filter_bank(fd_psd.delta_f, flow+band/2, band, nchans, fd_psd, spec_corr) - -This function will returns 2 arrays: the ``filter_bank`` array which is a list of `COMPLEX16FrequencySeries `_ arrays corresponding to each channel's filter, and the =fdb= array which provides the time-series from each filter. The length of each array is equal to the total number of channel (i.e. =nchans=). The filter's data, :math:`\Delta f` value, and first and last frequencies of any channel's filter can be displayed as followed: :: - - # Print data of first channel's filter - print filter_bank[0].data.data - # Print frequency separation between 2 values in the first channel's filter - print filter_bank[0].deltaF - # Print first frequency of the first channel's filter - print filter_bank[0].f0 - # Print last frequency of the first channel's filter (equal to twice the channel's bandwidth) - print filter_bank[0].f0+(len(filter_bank[0].data.data)-1)*filter_bank[0].deltaF - -Further in the analysis, the following filters will used: -1. ``white_filter_ip``: Whitened filter inner products computed with themselves. -2. ``unwhite_filter_ip``: Unwhitened filter inner products computed with themselves. -3. ``white_ss_ip``: Whitened filter inner products computed between input adjacent filters. -4. ``unwhite_ss_ip``: Unwhitened filter inner products computed between input adjacent filters. - -:: - - # This is necessary to compute the mu^2 normalizations - white_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, None) - unwhite_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, lal_psd) - # These two are needed for the unwhitened mean square sum (hrss) - white_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, None) - unwhite_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, lal_psd) - -Normalization of virtual channel --------------------------------- - -The virtual channels will be used during the excesspower analysis to explore different frequency ranges around each PSD segments and look for possible triggers. Each channel is renormalized using the :ref:`compute_channel_renomalization ` internal function. :: - - # Initialise dictionary - mu_sq_dict = {} - # nc_sum additional channel adds - for nc_sum in range(0, int(math.log(nchans, 2))): - min_band = (len(filter_bank[0].data.data)-1) * filter_bank[0].deltaF / 2 - print tprint(t0,t1),"Calculation for %d %d Hz channels" % (nc_sum+1, min_band) - nc_sum = 2**nc_sum - 1 - mu_sq_dict[nc_sum] = compute_channel_renomalization(nc_sum, filter_bank, spec_corr, nchans) - -Initialise event list and determine stride boundaries ------------------------------------------------------ - -First of all, we create a table similar than the one made by the LIGO Scientific Collaboration (LSC) where all the information will be stored. Such table is commonly know as ``lsctables``. A pre-defined LSC table can be constructed using ``New`` function from the `glue.ligolw.lsctables `_ module. We use the ``SnglBurstTable`` function for the type of data to be stored and define all the columns we wish to record. :: - - # Create event list for single burst table - event_list = lsctables.New(lsctables.SnglBurstTable, - ['start_time','start_time_ns','peak_time','peak_time_ns', - 'duration','bandwidth','central_freq','chisq_dof', - 'confidence','snr','amplitude','channel','ifo', - 'process_id','event_id','search','stop_time','stop_time_ns']) - -We also need to determine the indexes of both starting and ending times for the first segment to analyse, respectively ``t_idx_min`` and ``t_idx_max``. The default values are considered to be 0 for the starting index and the segment length in sample unit for the ending time index. Also, if the user defines a different starting time than the one from the loaded data, the offset index in sample unit is determined and added the both starting and ending time indexes. :: - - # Determine boundaries of stride in time domain - t_idx_min, t_idx_max = 0, seg_len - # Check if user requested starting time is defined - if args.analysis_start_time is not None: - # Define the time difference in seconds between data and user requested starting times - t_idx_off = args.analysis_start_time - ts_data.start_time - # Calculate the index of the user requested starting point in the data - t_idx_off = int(t_idx_off * args.sample_rate) - else: - # Define index of the starting point as first value in data - t_idx_off = 0 - # Initialise minimum index values as offset starting index - t_idx_min += t_idx_off - # Initialise maximum index values as offset starting index - t_idx_max += t_idx_off - -Finally, the index for the ending time after all the segments have been analysed can be estimated for the user-defined parameter or is defined as the length of the time series data ``ts_data``. :: - - # Check if user requested end time is defined - if args.analysis_end_time is not None: - # Define the time difference between data and user requested ending times - t_idx_max_off = args.analysis_end_time - ts_data.start_time - # Calculate the index of the user requested starting point in the data - t_idx_max_off = int(t_idx_max_off * args.sample_rate) - else: - # Define index of the ending point as the length of data array - t_idx_max_off = len(ts_data) - -.. _analysingblocks: - -Define analysing blocks ------------------------ - -The first thing we do is to calculate the time series for the segment that is covered (``tmp_ts_data``) and redefined the metadata, especially the time of the first sample in seconds which is defined by the ``epoch`` argument and is different for every segment. After plotting the time series for that segment, the data are then converted into frequency series (``fs_data``) using the `to_frequencyseries `_ module from the ``pycbc.types.timeseries.TimeSeries`` library. Finally, the frequency data are then whitened. :: - - # Loop over each data within the user requested time period - while t_idx_max <= t_idx_max_off: - # Define starting and ending time of the segment in seconds - start_time = ts_data.start_time + t_idx_min/float(args.sample_rate) - end_time = ts_data.start_time + t_idx_max/float(args.sample_rate) - print tprint(t0,t1),"Analyzing block %i to %i (%.2f percent)"%(start_time,end_time,100*float(t_idx_max)/float(idx_max_off)) - # Model a withen time series for the block - tmp_ts_data = types.TimeSeries(ts_data[t_idx_min:t_idx_max]*window, 1.0/args.sample_rate,epoch=start_time) - # Save time series in segment repository - segfolder = 'segments/%i-%i'%(start_time,end_time) - os.system('mkdir -p '+segfolder) - plot_ts(tmp_ts_data,fname='%s/ts.png'%(segfolder)) - # Convert times series to frequency series - fs_data = tmp_ts_data.to_frequencyseries() - print tprint(t0,t1),"Frequency series data has variance: %s" % fs_data.data.std()**2 - # Whitening (FIXME: Whiten the filters, not the data) - fs_data.data /= numpy.sqrt(fd_psd) / numpy.sqrt(2 * fd_psd.delta_f) - print tprint(t0,t1),"Whitened frequency series data has variance: %s" % fs_data.data.std()**2 - -Create time-frequency map for each block ----------------------------------------- - -We initialise a 2D zero array for a time-frequency map (``tf_map``) which will be computed for each frequency-domain filter associated to each PSD segment and where the filtered time-series for each frequency channels will be stored. The number of rows corresponds to the total number of frequency channels which is defined by the ``nchans`` variable. The number of columns corresponds to the segment length in samples (i.e. the number of samples covering one segment) which is defined by the ``seg_len`` variable. :: - - # Initialise 2D zero array for time-frequency map - tf_map = numpy.zeros((nchans, seg_len), dtype=numpy.complex128) - -We also initialise a zero vector for a temporary filter bank (``tmp_filter_bank``) that will store, for a given channel, the filter's values from the original filter bank (``filter_bank``) for that channel only. The length of the temporary filter bank is equal to the length of the PSD frequency series (``fd_psd``). :: - - # Initialise 1D zero array - tmp_filter_bank = numpy.zeros(len(fd_psd), dtype=numpy.complex128) - -We then loop over all the frequency channels. While in the loop, we first re-initialise the temporary filter bank with zero values everywhere along the frequency series. We then determine the first and last frequency of each channel and re-define the values of the filter in that frequency range based on the values from the original channel's filter from the original filter bank. :: - - # Loop over all the channels - print tprint(t0,t1),"Filtering all %d channels..." % nchans - for i in range(nchans): - # Reset filter bank series - tmp_filter_bank *= 0.0 - # Index of starting frequency - f1 = int(filter_bank[i].f0/fd_psd.delta_f) - # Index of ending frequency - f2 = int((filter_bank[i].f0 + 2*band)/fd_psd.delta_f)+1 - # (FIXME: Why is there a factor of 2 here?) - tmp_filter_bank[f1:f2] = filter_bank[i].data.data * 2 - -We then extract the frequency series from the filter bank for that channel, which will be used as a template waveform to filter the actual data from the channel. :: - - # Define the template to filter the frequency series with - template = types.FrequencySeries(tmp_filter_bank, delta_f=fd_psd.delta_f, copy=False) - -Finally, we use the `matched_filter_core `_ module from the ``pycbc.filter.matchedfilter`` library to filter the frequency series from the channel. This will return both a time series containing the complex signal-to-noise matched filtered against the data, and a frequency series containing the correlation vector. :: - - # Create filtered series - filtered_series = filter.matched_filter_core(template,fs_data,h_norm=None,psd=None, - low_frequency_cutoff=filter_bank[i].f0, - high_frequency_cutoff=filter_bank[i].f0+2*band) - -The `matched filter `_ is the optimal linear filter for maximizing the signal to noise ratio (SNR) in the presence of additive stochastic noise. The filtered time series is stored in the time-frequency map and can be used to produce a spectrogram of the segment of data being analysed. :: - - # Include filtered series in the map - tf_map[i,:] = filtered_series[0].numpy() - -The time-frequency map is a 2D array with a length that corresponds to the number of channels and a width equal to the number of sample present in one segment of data, i.e. segment's length in seconds times the the sampling rate. The map can finally be plotted with a :math:`\Delta t` corresponding to the sampling period of the original dataset (i.e. inverse of the original sampling rate), and :math:`\Delta f` is equal to the bandwidth of one channel. :: - - plot_spectrogram(numpy.abs(tf_map).T,tmp_ts_data.delta_t,fd_psd.delta_f,ts_data.sample_rate,start_time,end_time,fname='%s/tf.png'%(segfolder)) - -.. _tilebandwidth: - -Constructing tiles of different bandwidth ------------------------------------------ - -First and foremost, we define a clipping region in the data to be used to remove window corruption, this is non-zero if the ``window_fraction`` variable is set to a non-zero value. :: - - print tprint(t0,t1),"Beginning tile construction..." - # Clip the boundaries to remove window corruption - clip_samples = int(args.psd_segment_length * window_fraction * args.sample_rate / 2) - -In order to perform a multi-resolution search, tiles of many different bandwidths and durations will be scanned. We first need to setup a loop such that the maximum number of additional channel is equal to the base 2 logarithm of the total number of channels. The number of narrow band channels to be summed (``nc_sum``) would therefore be equal to 2 to the power of the current quantity of additional channels. :: - - for nc_sum in range(0, int(math.log(nchans, 2)))[::-1]: # nc_sum additional channel adds - nc_sum = 2**nc_sum - 1 - print tprint(t0,t1,t2),"Summing %d narrow band channels..." % (nc_sum+1) - -The undersampling rate for this tile can be calculated using the channel frequency band and the number of narrow band channels to be summed such that the bandwidth of the tile is equal to ``band * (nc_sum + 1)``. :: - - us_rate = int(round(1.0 / (2 * band*(nc_sum+1) * ts_data.delta_t))) - print >>sys.stderr, "Undersampling rate for this level: %f" % (args.sample_rate/us_rate) - -"Virtual" wide bandwidth channels are constructed by summing the samples from multiple channels, and correcting for the overlap between adjacent channel filters. We then define the normalised channel at the current level and create a time frequency map for this tile using the :ref:`make_indp_tiles ` internal function. In other word, we are constructing multiple sub-tiles for which we can determined the respective energy in the given frequency band. :: - - mu_sq = mu_sq_dict[nc_sum] - sys.stderr.write("\t...calculating tiles...") - if clip_samples > 0: - tiles = make_indp_tiles(tf_map[:,clip_samples:-clip_samples:us_rate], nc_sum, mu_sq) - else: - tiles = make_indp_tiles(tf_map[:,::us_rate], nc_sum, mu_sq) - sys.stderr.write(" TF-plane is %dx%s samples... " % tiles.shape) - print >>sys.stderr, " done" - print "Tile energy mean: %f, var %f" % (numpy.mean(tiles), numpy.var(tiles)) - -.. _tileduration: - -Explore multiple tile durations -------------------------------- - -Now that we create a tile with a specific bandwidth, we can start exploring different durations for the tile. We will start checking if the user manually defined a value for the longest duration tile to compute, which can be done using the =--max-duration= argument. If not, the value will be set to 32. :: - - if args.max_duration is not None: - max_dof = 2 * args.max_duration * (band * (nc_sum+1)) - else: - max_dof = 32 - assert max_dof >= 2 - -Since we produce (initially) tiles with 1 degree of freedom, the duration goes as one over twice the bandwidth. :: - - print "\t\t...getting longer durations..." - #for j in [2**l for l in xrange(1, int(math.log(max_dof, 2))+1)]: - for j in [2**l for l in xrange(0, int(math.log(max_dof, 2)))]: - sys.stderr.write("\t\tSumming DOF = %d ..." % (2*j)) - #tlen = tiles.shape[1] - j + 1 - tlen = tiles.shape[1] - 2*j + 1 + 1 - if tlen <= 0: - print >>sys.stderr, " ...not enough samples." - continue - dof_tiles = numpy.zeros((tiles.shape[0], tlen)) - #:sum_filter = numpy.ones(j) - # FIXME: This is the correct filter for 50% overlap - sum_filter = numpy.array([1,0] * (j-1) + [1]) - #sum_filter = numpy.array([1,0] * int(math.log(j, 2)-1) + [1]) - for f in range(tiles.shape[0]): - # Sum and drop correlate tiles - # FIXME: don't drop correlated tiles - #output = numpy.convolve(tiles[f,:], sum_filter, 'valid') - dof_tiles[f] = fftconvolve(tiles[f], sum_filter, 'valid') - print >>sys.stderr, " done" - print "Summed tile energy mean: %f, var %f" % (numpy.mean(dof_tiles), numpy.var(dof_tiles)) - level_tdiff = time.time() - tdiff - print >>sys.stderr, "Done with this resolution, total %f" % level_tdiff - -Finally, the bandwidth and duration of the tile can be defined as followed: :: - - # Current bandwidth of the time-frequency map tiles - current_band = band * (nc_sum + 1) - # How much each "step" is in the frequency domain -- almost - # assuredly the fundamental bandwidth - df = current_band - # How much each "step" is in the time domain -- under sampling rate - # FIXME: THis won't work if the sample rate isn't a power of 2 - dt = 1.0 / 2 / (2 * current_band) * 2 - full_band = 250 - dt = current_band / full_band * ts_data.sample_rate - dt = 1.0/dt - # Duration is fixed by the NDOF and bandwidth - duration = j / 2.0 / current_band - -.. _triggerfinding: - -Trigger finding +Getting Started --------------- -In order to find any trigger in the data, we first need to set a false alarm probability threshold in Gaussian noise above which signal will be distinguished from the noise. Such threshold can be determined by using the /inverse survival function/ method from the `scipy.stats.chi2 `_ package. :: - - threshold = scipy.stats.chi2.isf(args.tile_fap, j) - print "Threshold for this level: %f" % threshold - #if numpy.any(dof_tiles > threshold): - #plot_spectrogram(dof_tiles.T) - #import pdb; pdb.set_trace() - -Once the threshold is set, one can then run the :ref:`trigger_list_from_map ` function to quickly find the trigger signal from the ``dof_tiles`` array that :: - - # Since we clip the data, the start time needs to be adjusted accordingly - window_offset_epoch = fs_data.epoch + args.psd_segment_length * window_fraction / 2 - trigger_list_from_map(dof_tiles, event_list, threshold, window_offset_epoch, filter_bank[0].f0 + band/2, duration, current_band, df, dt, None) - for event in event_list[::-1]: - if event.amplitude != None: - continue - etime_min_idx = float(event.get_start()) - float(fs_data.epoch) - etime_min_idx = int(etime_min_idx / tmp_ts_data.delta_t) - etime_max_idx = float(event.get_start()) - float(fs_data.epoch) + event.duration - etime_max_idx = int(etime_max_idx / tmp_ts_data.delta_t) - # (band / 2) to account for sin^2 wings from finest filters - flow_idx = int((event.central_freq - event.bandwidth / 2 - (band / 2) - flow) / band) - fhigh_idx = int((event.central_freq + event.bandwidth / 2 + (band / 2) - flow) / band) - # TODO: Check that the undersampling rate is always commensurate - # with the indexing: that is to say that - # mod(etime_min_idx, us_rate) == 0 always - z_j_b = tf_map[flow_idx:fhigh_idx,etime_min_idx:etime_max_idx:us_rate] - # FIXME: Deal with negative hrss^2 -- e.g. remove the event - try: - event.amplitude = measure_hrss(z_j_b, unwhite_filter_ip[flow_idx:fhigh_idx], unwhite_ss_ip[flow_idx:fhigh_idx-1], white_ss_ip[flow_idx:fhigh_idx-1], fd_psd.delta_f, tmp_ts_data.delta_t, len(filter_bank[0].data.data), event.chisq_dof) - except ValueError: - event.amplitude = 0 - - print "Total number of events: %d" % len(event_list) - -Switch to new block -------------------- - -The following will move the frequency band to the next segment: :: - - tdiff = time.time() - tdiff - print "Done with this block: total %f" % tdiff - - t_idx_min += int(seg_len * (1 - window_fraction)) - t_idx_max += int(seg_len * (1 - window_fraction)) - -Extracting GPS time range -------------------------- - -We use the `LIGOTimeGPS `_ structure from the =glue.lal= package to /store the starting and ending time in the dataset to nanosecond precision and synchronized to the Global Positioning System time reference/. Once both times are defined, the range of value is stored in a semi-open interval using the `segment `_ module from the =glue.segments= package. :: - - # Starting epoch relative to GPS starting epoch - start_time = LIGOTimeGPS(args.analysis_start_time or args.gps_start_time) - # Ending epoch relative to GPS ending epoch - end_time = LIGOTimeGPS(args.analysis_end_time or args.gps_end_time) - # Represent the range of values in the semi-open interval - inseg = segment(start_time,end_time) - -Prepare output file for given time range ----------------------------------------- - -:: - - xmldoc = ligolw.Document() - xmldoc.appendChild(ligolw.LIGO_LW()) - - ifo = args.channel_name.split(":")[0] - proc_row = register_to_xmldoc(xmldoc, __program__, args.__dict__, ifos=[ifo],version=glue.git_version.id, cvs_repository=glue.git_version.branch, cvs_entry_time=glue.git_version.date) - - # Figure out the data we actually analyzed - outseg = determine_output_segment(inseg, args.psd_segment_length, args.sample_rate, window_fraction) - - ss = append_search_summary(xmldoc, proc_row, ifos=(station,), inseg=inseg, outseg=outseg) - - for sb in event_list: - sb.process_id = proc_row.process_id - sb.search = proc_row.program - #sb.ifo, sb.channel = args.channel_name.split(":") - sb.ifo, sb.channel = station, setname - - xmldoc.childNodes[0].appendChild(event_list) - fname = make_filename(station, inseg) - - utils.write_filename(xmldoc, fname, gz=fname.endswith("gz"), verbose=True) - -Plot trigger results --------------------- - -:: - - events = SnglBurstTable.read(fname+'.gz') - #del events[10000:] - plot = events.plot('time', 'central_freq', "duration", "bandwidth", color="snr") - #plot = events.plot('time', 'central_freq', color='snr') - #plot.set_yscale("log") - plot.set_ylim(1e-0, 250) - t0 = 1153742417 - plot.set_xlim(t0 + 0*60, t0 + 1*60) - #plot.set_xlim(t0 + 28, t0 + 32) - pyplot.axvline(t0 + 30, color='r') - cb = plot.add_colorbar(cmap='viridis') - plot.savefig("triggers.png") - -Module Access -============= - -Extract Magnetic Field Data ---------------------------- - -Extract magnetic field data from HDF5 files. - -.. currentmodule:: gdas.retrieve - -.. autosummary:: - :toctree: generated/ - - magfield - file_to_segment - construct_utc_from_metadata - generate_timeseries - create_activity_list - retrieve_data_timeseries - retrieve_channel_data - -Plotting routines ------------------ - -Methods to produce time-frequency plots and others - -.. currentmodule:: gdas.plots - -.. autosummary:: - :toctree: generated/ +.. toctree:: + :maxdepth: 2 - plot_activity - plot_time_series - plot_asd - plot_whitening - plot_ts - plot_spectrum - plot_spectrogram - plot_spectrogram_from_ts - plot_triggers + installation + server + example Excess Power Search Analysis ----------------------------- - -Main class to do excess-power search analysis - -.. currentmodule:: gdas.epower - -.. autosummary:: - :toctree: generated/ +------------------- - excess_power - check_filtering_settings - calculate_psd - calculate_spectral_correlation - create_filter_bank - convert_to_time_domain - identify_block - create_tf_plane - compute_filter_ips_self - compute_filter_ips_adjacent - compute_channel_renormalization - measure_hrss - measure_hrss_slowly - measure_hrss_poorly - trigger_list_from_map - determine_output_segment - make_tiles - make_indp_tiles - make_filename - construct_tiles - create_tile_duration - create_xml +.. toctree:: + :maxdepth: 1 + + epower_overview + epower_step1_checkfilt + epower_step2_psd + epower_step3_speccor + epower_step4_filterbank + epower_step5_normalization + epower_step6_initialization -Utilities ---------- - -Independent routines to do various other things - -.. currentmodule:: gdas.utils - -.. autosummary:: - :toctree: generated/ - - create_sound - - -.. _file_to_segment: - -.. ** Extract segment information -.. -.. The starting and ending UTC times for a specific HDF5 file are determined by using the =Date=, =t0= and =t1= attributes from the metadata. The [[construct_utc_from_metadata][=construct_utc_from_metadata=]] function is then used to calculate the UTC time. Finally, the [[http://software.ligo.org/docs/glue/glue.__segments.segment-class.html][=segment=]] module from the =glue.segments= library is used to represent the range of times in a semi-open interval. -.. -.. #+BEGIN_SRC python -.. def file_to_segment(hfile,segname): -.. # Extract all atributes from the data -.. attrs = hfile[segname].attrs -.. # Define each attribute -.. dstr, t0, t1 = attrs["Date"], attrs["t0"], attrs["t1"] -.. # Construct GPS starting time from data -.. start_utc = construct_utc_from_metadata(dstr, t0) -.. # Construct GPS starting time from data -.. end_utc = construct_utc_from_metadata(dstr, t1) -.. # Represent the range of times in the semi-open interval -.. return segment(start_utc,end_utc) -.. #+END_SRC -.. -.. ** Constructing UTC from metadata -.. <> -.. -.. #+BEGIN_SRC python -.. def construct_utc_from_metadata(datestr, t0str): -.. instr = "%d-%d-%02dT" % tuple(map(int, datestr.split('/'))) -.. instr += t0str -.. t = Time(instr, format='isot', scale='utc') -.. return t.gps -.. #+END_SRC -.. -.. ** Generate time series -.. <> -.. -.. #+BEGIN_SRC python -.. def generate_timeseries(data_list, setname="MagneticFields"): -.. full_data = TimeSeriesList() -.. for seg in sorted(data_list): -.. hfile = h5py.File(data_list[seg], "r") -.. full_data.append(retrieve_data_timeseries(hfile, "MagneticFields")) -.. hfile.close() -.. return full_data -.. #+END_SRC -.. -.. ** Retrieve data time series -.. <> -.. -.. #+BEGIN_SRC python -.. def retrieve_data_timeseries(hfile, setname): -.. dset = hfile[setname] -.. sample_rate = dset.attrs["SamplingRate(Hz)"] -.. gps_epoch = construct_utc_from_metadata(dset.attrs["Date"], dset.attrs["t0"]) -.. data = retrieve_channel_data(hfile, setname) -.. ts_data = TimeSeries(data, sample_rate=sample_rate, epoch=gps_epoch) -.. return ts_data -.. #+END_SRC -.. -.. ** Retrieve channel data -.. <> -.. -.. #+BEGIN_SRC python -.. def retrieve_channel_data(hfile, setname): -.. return hfile[setname][:] -.. #+END_SRC -.. -.. .. _calculate_spectral_correlation: -.. -.. ** Two point spectral correlation -.. -.. For our data, we apply a Tukey window whose flat bit corresponds to =window_fraction= (in percentage) of the segment length (in samples) used for PSD estimation (i.e. =fft_window_len=). This can be done by using the [[http://software.ligo.org/docs/lalsuite/lal/_window_8c_source.html#l00597][=CreateTukeyREAL8Window=]] module from the =lal= library. -.. -.. #+BEGIN_SRC python -.. def calculate_spectral_correlation(fft_window_len, wtype='hann', window_fraction=None): -.. if wtype == 'hann': -.. window = lal.CreateHannREAL8Window(fft_window_len) -.. elif wtype == 'tukey': -.. window = lal.CreateTukeyREAL8Window(fft_window_len, window_fraction) -.. else: -.. raise ValueError("Can't handle window type %s" % wtype) -.. #+END_SRC -.. -.. Once the window is built, a new frequency plan is created which will help performing a [[http://fourier.eng.hmc.edu/e101/lectures/fourier_transform_d/node1.html][forward transform]] on the data. This is done with the [[http://software.ligo.org/docs/lalsuite/lal/group___real_f_f_t__h.html#gac4413752db2d19cbe48742e922670af4][=CreateForwardREAL8FFTPlan=]] module which takes as argument the total number of points in the real data and the measurement level for plan creation (here 1 stands for measuring the best plan). -.. -.. #+BEGIN_SRC python -.. fft_plan = lal.CreateForwardREAL8FFTPlan(len(window.data.data), 1) -.. #+END_SRC -.. -.. We can finally compute and return the two-point spectral correlation function for the whitened frequency series (=fft_plan=) from the window applied to the original time series using the [[http://software.ligo.org/docs/lalsuite/lal/group___time_freq_f_f_t__h.html#ga2bd5c4258eff57cc80103d2ed489e076][=REAL8WindowTwoPointSpectralCorrelation=]] module. -.. -.. #+BEGIN_SRC python -.. return window, lal.REAL8WindowTwoPointSpectralCorrelation(window, fft_plan) -.. #+END_SRC -.. -.. ** Create filter bank -.. <> -.. -.. The construction of a filter bank is fairly simple. For each channel, a frequency domain channel filter function will be created using the [[http://software.ligo.org/docs/lalsuite/lalburst/group___e_p_search__h.html#ga899990cbd45111ba907772650c265ec9][=CreateExcessPowerFilter=]] module from the =lalburst= package. Each channel filter is divided by the square root of the PSD frequency series prior to normalization, which has the effect of de-emphasizing frequency bins with high noise content, and is called "over whitening". The data and metadata are finally stored in the =filter_fseries= and =filter_bank= arrays respectively. Finally, we store on a final array, called =np_filters= the all time-series generated from each filter so that we can plot them afterwards -.. -.. #+BEGIN_SRC python -.. def create_filter_bank(delta_f, flow, band, nchan, psd, spec_corr): -.. lal_psd = psd.lal() -.. lal_filters, np_filters = [],[] -.. for i in range(nchan): -.. lal_filter = lalburst.CreateExcessPowerFilter(flow + i*band, band, lal_psd, spec_corr) -.. np_filters.append(Spectrum.from_lal(lal_filter)) -.. lal_filters.append(lal_filter) -.. return filter_fseries, lal_filters, np_filters -.. #+END_SRC -.. -.. ** Compute filter inner products with themselves -.. <> -.. #+BEGIN_SRC python -.. def compute_filter_ips_self(lal_filters, spec_corr, psd=None): -.. """ -.. Compute a set of inner products of input filters with themselves. If psd -.. argument is given, the unwhitened filter inner products will be returned. -.. """ -.. return numpy.array([lalburst.ExcessPowerFilterInnerProduct(f, f, spec_corr, psd) for f in lal_filters]) -.. #+END_SRC -.. -.. ** Compute filter inner products with adjecant filters -.. <> -.. -.. #+BEGIN_SRC python -.. def compute_filter_ips_adjacent(lal_filters, spec_corr, psd=None): -.. """ -.. Compute a set of filter inner products between input adjacent filters. -.. If psd argument is given, the unwhitened filter inner products will be -.. returned. The returned array index is the inner product between the -.. lal_filter of the same index, and its (array) adjacent filter --- assumed -.. to be the frequency adjacent filter. -.. """ -.. return numpy.array([lalburst.ExcessPowerFilterInnerProduct(f1, f2, spec_corr, psd) for f1, f2 in zip(lal_filters[:-1], lal_filters[1:])]) -.. #+END_SRC -.. -.. .. _compute_channel_renomalization: -.. -.. Compute channel renormalization -.. ------------------------------- -.. -.. Compute the renormalization for the base filters up to a given bandwidth. -.. -.. #+BEGIN_SRC python -.. def compute_channel_renomalization(nc_sum, lal_filters, spec_corr, nchans, verbose=True): -.. mu_sq = (nc_sum+1)*numpy.array([lalburst.ExcessPowerFilterInnerProduct(f, f, spec_corr, None) for f in lal_filters]) -.. # Uncomment to get all possible frequency renormalizations -.. #for n in xrange(nc_sum, nchans): # channel position index -.. for n in xrange(nc_sum, nchans, nc_sum+1): # channel position index -.. for k in xrange(0, nc_sum): # channel sum index -.. # FIXME: We've precomputed this, so use it instead -.. mu_sq[n] += 2*lalburst.ExcessPowerFilterInnerProduct(lal_filters[n-k], lal_filters[n-1-k], spec_corr, None) -.. #print mu_sq[nc_sum::nc_sum+1] -.. return mu_sq -.. #+END_SRC -.. -.. ** Measure root-sum-square strain (hrss) -.. <> -.. -.. #+BEGIN_SRC python -.. def measure_hrss(z_j_b, uw_ss_ii, uw_ss_ij, w_ss_ij, delta_f, delta_t, filter_len, dof): -.. """ -.. Approximation of unwhitened sum of squares signal energy in a given EP tile. -.. See T1200125 for equation number reference. -.. z_j_b - time frequency map block which the constructed tile covers -.. uw_ss_ii - unwhitened filter inner products -.. uw_ss_ij - unwhitened adjacent filter inner products -.. w_ss_ij - whitened adjacent filter inner products -.. delta_f - frequency binning of EP filters -.. delta_t - native time resolution of the time frequency map -.. filter_len - number of samples in a fitler -.. dof - degrees of freedom in the tile (twice the time-frequency area) -.. """ -.. s_j_b_avg = uw_ss_ii * delta_f / 2 -.. # unwhitened sum of squares of wide virtual filter -.. s_j_nb_avg = uw_ss_ii.sum() / 2 + uw_ss_ij.sum() -.. s_j_nb_avg *= delta_f -.. s_j_nb_denom = s_j_b_avg.sum() + 2 * 2 / filter_len * \ -.. numpy.sum(numpy.sqrt(s_j_b_avg[:-1] * s_j_b_avg[1:]) * w_ss_ij) -.. # eqn. 62 -.. uw_ups_ratio = s_j_nb_avg / s_j_nb_denom -.. # eqn. 63 -- approximation of unwhitened signal energy time series -.. # FIXME: The sum in this equation is over nothing, but indexed by frequency -.. # I'll make that assumption here too. -.. s_j_nb = numpy.sum(z_j_b.T * numpy.sqrt(s_j_b_avg), axis=0) -.. s_j_nb *= numpy.sqrt(uw_ups_ratio / filter_len * 2) -.. # eqn. 64 -- approximate unwhitened signal energy minus noise contribution -.. # FIXME: correct axis of summation? -.. return math.sqrt(numpy.sum(numpy.absolute(s_j_nb)**2) * delta_t - s_j_nb_avg * dof * delta_t) -.. #+END_SRC -.. -.. ** Unwhitened inner products filtering -.. <> -.. -.. #+BEGIN_SRC python -.. # < s^2_j(f_1, b) > = 1 / 2 / N * \delta_t EPIP{\Theta, \Theta; P} -.. def uw_sum_sq(filter1, filter2, spec_corr, psd): -.. return lalburst.ExcessPowerFilterInnerProduct(filter1, filter2, spec_corr, psd) -.. #+END_SRC -.. -.. ** Unwhitened sum of squares signal -.. <> -.. -.. #+BEGIN_SRC python -.. def measure_hrss_slowly(z_j_b, lal_filters, spec_corr, psd, delta_t, dof): -.. """ -.. Approximation of unwhitened sum of squares signal energy in a given EP tile. -.. See T1200125 for equation number reference. NOTE: This function is deprecated -.. in favor of measure_hrss, since it requires recomputation of many inner products, -.. making it particularly slow. -.. """ -.. # FIXME: Make sure you sum in time correctly -.. # Number of finest bands in given tile -.. nb = len(z_j_b) -.. # eqn. 56 -- unwhitened mean square of filter with itself -.. uw_ss_ii = numpy.array([uw_sum_sq(lal_filters[i], lal_filters[i], spec_corr, psd) for i in range(nb)]) -.. s_j_b_avg = uw_ss_ii * lal_filters[0].deltaF / 2 -.. # eqn. 57 -- unwhitened mean square of filter with adjacent filter -.. uw_ss_ij = numpy.array([uw_sum_sq(lal_filters[i], lal_filters[i+1], spec_corr, psd) for i in range(nb-1)]) -.. # unwhitened sum of squares of wide virtual filter -.. s_j_nb_avg = uw_ss_ii.sum() / 2 + uw_ss_ij.sum() -.. s_j_nb_avg *= lal_filters[0].deltaF -.. -.. # eqn. 61 -.. w_ss_ij = numpy.array([uw_sum_sq(lal_filters[i], lal_filters[i+1], spec_corr, None) for i in range(nb-1)]) -.. s_j_nb_denom = s_j_b_avg.sum() + 2 * 2 / len(lal_filters[0].data.data) * \ -.. (numpy.sqrt(s_j_b_avg[:-1] * s_j_b_avg[1:]) * w_ss_ij).sum() -.. -.. # eqn. 62 -.. uw_ups_ratio = s_j_nb_avg / s_j_nb_denom -.. -.. # eqn. 63 -- approximation of unwhitened signal energy time series -.. # FIXME: The sum in this equation is over nothing, but indexed by frequency -.. # I'll make that assumption here too. -.. s_j_nb = numpy.sum(z_j_b.T * numpy.sqrt(s_j_b_avg), axis=0) -.. s_j_nb *= numpy.sqrt(uw_ups_ratio / len(lal_filters[0].data.data) * 2) -.. # eqn. 64 -- approximate unwhitened signal energy minus noise contribution -.. # FIXME: correct axis of summation? -.. return math.sqrt((numpy.absolute(s_j_nb)**2).sum() * delta_t - s_j_nb_avg * dof * delta_t) -.. #+END_SRC -.. -.. ** Measure root-mean square strain poorly -.. <> -.. -.. #+BEGIN_SRC python -.. def measure_hrss_poorly(tile_energy, sub_psd): -.. return math.sqrt(tile_energy / numpy.average(1.0 / sub_psd) / 2) -.. #+END_SRC -.. -.. ** List triggers from map -.. <> -.. -.. #+BEGIN_SRC python -.. def trigger_list_from_map(tfmap, event_list, threshold, start_time, start_freq, duration, band, df, dt, psd=None): -.. -.. # FIXME: If we don't convert this the calculation takes forever --- but we should convert it once and handle deltaF better later -.. if psd is not None: -.. npy_psd = psd.numpy() -.. -.. start_time = LIGOTimeGPS(float(start_time)) -.. ndof = 2 * duration * band -.. -.. spanf, spant = tfmap.shape[0] * df, tfmap.shape[1] * dt -.. print "Processing %.2fx%.2f time-frequency map." % (spant, spanf) -.. -.. for i, j in zip(*numpy.where(tfmap > threshold)): -.. event = event_list.RowType() -.. -.. # The points are summed forward in time and thus a `summed point' is the -.. # sum of the previous N points. If this point is above threshold, it -.. # corresponds to a tile which spans the previous N points. However, th -.. # 0th point (due to the convolution specifier 'valid') is actually -.. # already a duration from the start time. All of this means, the + -.. # duration and the - duration cancels, and the tile 'start' is, by -.. # definition, the start of the time frequency map if j = 0 -.. # FIXME: I think this needs a + dt/2 to center the tile properly -.. event.set_start(start_time + float(j * dt)) -.. event.set_stop(start_time + float(j * dt) + duration) -.. event.set_peak(event.get_start() + duration / 2) -.. event.central_freq = start_freq + i * df + 0.5 * band -.. -.. event.duration = duration -.. event.bandwidth = band -.. event.chisq_dof = ndof -.. -.. event.snr = math.sqrt(tfmap[i,j] / event.chisq_dof - 1) -.. # FIXME: Magic number 0.62 should be determine empircally -.. event.confidence = -lal.LogChisqCCDF(event.snr * 0.62, event.chisq_dof * 0.62) -.. if psd is not None: -.. # NOTE: I think the pycbc PSDs always start at 0 Hz --- check -.. psd_idx_min = int((event.central_freq - event.bandwidth / 2) / psd.delta_f) -.. psd_idx_max = int((event.central_freq + event.bandwidth / 2) / psd.delta_f) -.. -.. # FIXME: heuristically this works better with E - D -- it's all -.. # going away with the better h_rss calculation soon anyway -.. event.amplitude = measure_hrss_poorly(tfmap[i,j] - event.chisq_dof, npy_psd[psd_idx_min:psd_idx_max]) -.. else: -.. event.amplitude = None -.. -.. event.process_id = None -.. event.event_id = event_list.get_next_id() -.. event_list.append(event) -.. #+END_SRC -.. -.. ** Determine output segment -.. <> -.. -.. #+BEGIN_SRC python -.. def determine_output_segment(inseg, dt_stride, sample_rate, window_fraction=0.0): -.. """ -.. Given an input data stretch segment inseg, a data block stride dt_stride, the data sample rate, and an optional window_fraction, return the amount of data that can be processed without corruption effects from the window. -.. -.. If window_fration is set to 0 (default), assume no windowing. -.. """ -.. # Amount to overlap successive blocks so as not to lose data -.. window_overlap_samples = window_fraction * sample_rate -.. outseg = inseg.contract(window_fraction * dt_stride / 2) -.. -.. # With a given dt_stride, we cannot process the remainder of this data -.. remainder = math.fmod(abs(outseg), dt_stride * (1 - window_fraction)) -.. # ...so make an accounting of it -.. outseg = segment(outseg[0], outseg[1] - remainder) -.. return outseg -.. #+END_SRC -.. -.. ** Make tiles -.. <> -.. -.. #+BEGIN_SRC python -.. def make_tiles(tf_map, nc_sum, mu_sq): -.. tiles = numpy.zeros(tf_map.shape) -.. sum_filter = numpy.ones(nc_sum+1) -.. # Here's the deal: we're going to keep only the valid output and -.. # it's *always* going to exist in the lowest available indices -.. for t in xrange(tf_map.shape[1]): -.. # Sum and drop correlate tiles -.. # FIXME: don't drop correlated tiles -.. output = numpy.convolve(tf_map[:,t], sum_filter, 'valid')[::nc_sum+1] -.. #output = fftconvolve(tf_map[:,t], sum_filter, 'valid')[::nc_sum+1] -.. tiles[:len(output),t] = numpy.absolute(output) / math.sqrt(2) -.. return tiles[:len(output)]**2 / mu_sq[nc_sum::nc_sum+1].reshape(-1, 1) -.. #+END_SRC -.. -.. ** Create a time frequency map -.. <> -.. -.. In this function, we create a time frequency map with resolution similar than =tf_map= but rescale by a factor of =nc_sum= + 1. All tiles will be independent up to overlap from the original tiling. The =mu_sq= is applied to the resulting addition to normalize the outputs to be zero-mean unit-variance Gaussian variables (if the input is Gaussian). -.. -.. #+BEGIN_SRC python -.. def make_indp_tiles(tf_map, nc_sum, mu_sq): -.. tiles = tf_map.copy() -.. # Here's the deal: we're going to keep only the valid output and -.. # it's *always* going to exist in the lowest available indices -.. stride = nc_sum + 1 -.. for i in xrange(tiles.shape[0]/stride): -.. numpy.absolute(tiles[stride*i:stride*(i+1)].sum(axis=0), tiles[stride*(i+1)-1]) -.. return tiles[nc_sum::nc_sum+1].real**2 / mu_sq[nc_sum::nc_sum+1].reshape(-1, 1) -.. #+END_SRC -.. -.. ** Create output filename -.. <> -.. -.. #+BEGIN_SRC python -.. def make_filename(ifo, seg, tag="excesspower", ext="xml.gz"): -.. if isinstance(ifo, str): -.. ifostr = ifo -.. else: -.. ifostr = "".join(ifo) -.. st_rnd, end_rnd = int(math.floor(seg[0])), int(math.ceil(seg[1])) -.. dur = end_rnd - st_rnd -.. return "%s-%s-%d-%d.%s" % (ifostr, tag, st_rnd, dur, ext) -.. #+END_SRC - diff --git a/docs/_sources/installation.rst.txt b/docs/_sources/installation.rst.txt new file mode 100644 index 0000000..cb5ed4e --- /dev/null +++ b/docs/_sources/installation.rst.txt @@ -0,0 +1,57 @@ +Installation +============ + +The program requires the following general packages to run: `Numpy `_, `Matplotlib `_, `Scipy `_ and `Astropy `_. The following LIGO-related packages are also required for full functionality: `Gwpy `_, `PyCBC `_, `Glue `_, `LAL `_, `LALburst `_ and `LALsimulation `_. + +While most of the packages can be installed automatically using `pip `_, some LIGO packages (Glue, LAL, LALburst and LALsimulation) must be installed separately beforehand as they contain several C routines that need specific compilation. However, these packages are already included in a bigger package called `LALsuite `_ which can be installed fairly easily on Debian (Linux) and Mac OS machines. + +LALsuite tools +-------------- + +Some useful pages on how to download and install the LIGO software can be found `here `_. + +MacPorts (Mac) +~~~~~~~~~~~~~~ + +For Mac users, the installation is pretty easy, detailed information can be found on `this page `_. You need to have `MacPorts `_ installed. The following commands should suffice to install the LALsuite package on your machine:: + + sudo port install lscsoft-deps + sudo port install glue + sudo port install lalapps + +The first command will install all the dependencies needed for the LIGO software to be installed. The following 2 commands will install the actual packages. + +apt-get (Debian) +~~~~~~~~~~~~~~~~ + +Since the LIGO software is not a default package in the apt package manager system on Debian machine, additional steps will be needed. The first step is to add the following links to the source list located at ``/etc/apt/sources.list``:: + + deb [arch=amd64] http://software.ligo.org/lscsoft/debian jessie contrib + deb-src [arch=amd64] http://software.ligo.org/lscsoft/debian jessie contrib + +Note that the ``[arch=amd64]`` is needed to fix the architecture problem in case it tries to install i386 version on 64-bit Debian. Once the sources have been added, you must first install all the dependencies as follows:: + + apt-get install build-essential automake autoconf libtool devscripts + +The LIGO software can finally be installed using the following command:: + + apt-get install lscsoft-all + +Main Program +------------ + +The best way to install the GNOME software along with the rest of the dependencies is by using `pip`:: + + pip install gdas + +(You may need to put a ``sudo`` in front of this). For this to work +you need to have `pip +`_ installed. This +method allows for easy uninstallation. + +You can also simply download the tarball from the PyPI website, unpack it and then do:: + + python setup.py install + +The latest stable package can be downloaded from PyPI: https://pypi.python.org/pypi/gdas. +The development version can be downloaded from `here `_. diff --git a/docs/_sources/server.rst.txt b/docs/_sources/server.rst.txt new file mode 100644 index 0000000..b8b5172 --- /dev/null +++ b/docs/_sources/server.rst.txt @@ -0,0 +1,20 @@ +Multi-user Server +================= + +A GNOME JupyterHub, or multi-user server has been created to allow each member to access the entire available dataset. Member who do not have access to the server but wish to access it should send a request to Dr. Sam Afach. Member who are not part of the GNOME collaboration will not be granted access to the dataset but are free to use our software on their own data. + +The server can be accessed in two ways, either by acceding the `server's webpage `_, or from your terminal through SSH:: + + ssh -X username@budker.uni-mainz.de -p 8022 + +While SSH is very handy for people using UNIX-like operating systems, this can become more complicated for those working on Windows machines. Fortunately, access to a terminal is also possible through the webpage, which means directly from your internet browser! This can be done by clicking on the New tab after login and select Terminal: + +.. figure:: img/jupyter1.png + :width: 70% + :align: center + +You can then use the terminal window to access files and create new Python scripts for your analysis. + +.. figure:: img/jupyter2.png + :width: 70% + :align: center diff --git a/docs/backup.html b/docs/backup.html new file mode 100644 index 0000000..fa9f18c --- /dev/null +++ b/docs/backup.html @@ -0,0 +1,1145 @@ + + + + + + + + GNOME Data Analysis Software — gdas 0.2.9 documentation + + + + + + + + + + + + + + + +
+ + +
+

GNOME Data Analysis Software

+
    +
  • test2
  • +
+
+
+ +
+
+

Introduction

+

This package contains functions useful for magnetic field signal processing, with a focus on Excess Power search analysis and application on the data for the GNOME collaboration, see Pustelny et al. (2013). This documentation details all the available functions and tasks available through this software. Here are some example tasks that can (or will soon to) be handled:

+
    +
  • Plot usual time series and spectrogram of magnetic field data.
  • +
  • Perform excess power analysis and plot detected triggers in time-frequency map.
  • +
  • Create artificial data for testing data analysis.
  • +
  • Inject fake signal of different bandwidth and durations.
  • +
  • Cross-correlation of continuous sine wave signals.
  • +
  • Perform Allan Standard deviation.
  • +
+Fork me on GitHub
+
+

Installation

+

The program requires the following general packages to run: Numpy, Matplotlib, Scipy and Astropy. The following LIGO-related packages are also required for full functionality: Gwpy, PyCBC, Glue, LAL, LALburst and LALsimulation.

+

While most of the packages can be installed automatically using pip, some LIGO packages (Glue, LAL, LALburst and LALsimulation) must be installed separately beforehand as they contain several C routines that need specific compilation. However, these packages are already included in a bigger package called LALsuite which can be installed fairly easily on Debian (Linux) and Mac OS machines.

+
+

LALsuite tools

+

Some useful pages on how to download and install the LIGO software can be found here.

+
+

MacPorts (Mac)

+

For Mac users, the installation is pretty easy, detailed information can be found on this page. You need to have MacPorts installed. The following commands should suffice to install the LALsuite package on your machine:

+
sudo port install lscsoft-deps
+sudo port install glue
+sudo port install lalapps
+
+
+

The first command will install all the dependencies needed for the LIGO software to be installed. The following 2 commands will install the actual packages.

+
+
+

apt-get (Debian)

+

Since the LIGO software is not a default package in the apt package manager system on Debian machine, additional steps will be needed. The first step is to add the following links to the source list located at /etc/apt/sources.list:

+
deb [arch=amd64] http://software.ligo.org/lscsoft/debian jessie contrib
+deb-src [arch=amd64] http://software.ligo.org/lscsoft/debian jessie contrib
+
+
+

Note that the [arch=amd64] is needed to fix the architecture problem in case it tries to install i386 version on 64-bit Debian. Once the sources have been added, you must first install all the dependencies as follows:

+
apt-get install build-essential automake autoconf libtool devscripts
+
+
+

The LIGO software can finally be installed using the following command:

+
apt-get install lscsoft-all
+
+
+
+
+
+

Main Program

+

The best way to install the GNOME software along with the rest of the dependencies is by using pip:

+
pip install gdas
+
+
+

(You may need to put a sudo in front of this). For this to work +you need to have pip installed. This +method allows for easy uninstallation.

+

You can also simply download the tarball from the PyPI website, unpack it and then do:

+
python setup.py install
+
+
+

The latest stable package can be downloaded from PyPI: https://pypi.python.org/pypi/gdas. +The development version can be downloaded from here.

+
+
+
+

Multi-user Server

+

A GNOME JupyterHub, or multi-user server has been created to allow each member to access the entire available dataset. Member who do not have access to the server but wish to access it should send a request to Dr. Sam Afach. Member who are not part of the GNOME collaboration will not be granted access to the dataset but are free to use our software on their own data.

+

The server can be accessed in two ways, either by acceding the server’s webpage, or from your terminal through SSH:

+
ssh -X username@budker.uni-mainz.de -p 8022
+
+
+

While SSH is very handy for people using UNIX-like operating systems, this can become more complicated for those working on Windows machines. Fortunately, access to a terminal is also possible through the webpage, which means directly from your internet browser! This can be done by clicking on the New tab after login and select Terminal:

+
+_images/jupyter1.png +
+

You can then use the terminal window to access files and create new Python scripts for your analysis.

+
+_images/jupyter2.png +
+
+
+

Working Example

+

Either on your own computer or on the server, on a Jupyter notebook or on a Python script, the first thing to do is to import the gdas package that contain all the modules present in the GNOME software. That can be done easily by doing the following:

+
import gdas
+
+
+

In order to retrieve a specific chunk of data to be analyzed for a particular station, the name of the station along with the start and end dates should be specified:

+
station    = 'fribourg01'
+start_time = '2016-11-03-04'
+end_time   = '2016-11-03-04-2'
+
+
+

where the start and end times should always have at least the year, month and day specified, and with the values separated by a dash symbol. Hour and minute can also be specified.

+

If you are not working on the server and the data are located in a different repository than /GNOMEDrive/gnome/serverdata/, a custom path can be defined. For instance:

+
datapath = '/Users/vincent/data/GNOMEDrive/gnome/serverdata/'
+
+
+

The magnetic field data can then be retrieve as follows:

+
ts_data,ts_list,activity = gdas.magfield(station,start_time,end_time,rep=datapath)
+
+
+

The gdas.magfield method will return 3 arrays of data that can then be used to produce different plots:

+
gdas.plot_activity(activity)
+gdas.plot_time_series(station,ts_list,seglist=activity)
+gdas.plot_asd(station,ts_list)
+gdas.plot_whitening(station,ts_list,activity)
+
+
+

This is a script to do Excess Power analysis:

+
psd_segment_length = 60
+psd_segment_stride = 30
+psd_estimation     = 'median-mean'
+window_fraction    = 0
+tile_fap           = 1e-5
+channels           = 250
+
+gdas.excess_power(ts_data,psd_segment_length,psd_segment_stride,psd_estimation,window_fraction,tile_fap,station,nchans=channels)
+gdas.plot_triggers()
+
+
+
+
+

Data extraction

+
+

Extracting real data

+
+

Retrieve metadata

+

The first step is to define some variables related to which data we want to study and their location. The os.path.join method will join that different paths called as arguments (i.e. in the parenthesis):

+
# Set name of the channel to extract
+setname = "MagneticFields"
+# Define station name and map
+station = "fribourg01"
+# Define year, month and day
+year,month,day = '2016','11','03'
+# Define path to main data repository
+path1 = '/Users/vincent/ASTRO/data/GNOMEDrive/gnome/serverdata/'
+# Define path to day repository
+path2 = "%s/%s/%s/%s/"%(station,year,month,day)
+# Define generic hdf5 filenames
+path3 = "%s_%s%s%s_*.hdf5"%(station,year,month,day)
+# Define full generic path name
+fullpath = os.path.join(path1,path2,path3)
+
+
+

We then use the glob module to list all the files that satisfy the full path name and loop over each HDF5 file and do the following:

+
    +
  • Extract its metadata using the h5py package;
  • +
  • Calculate the segment in time for which the data corresponds to using the file_to_segment function;
  • +
  • Store each filename and metadata on two different dictionary variables file_order and file_order.
  • +
+

Finally, we extract the sampling rate from one of the file which will be use later in the analysis. The sampling rate is the same for all the data files:

+
# Initialising dictionary for data
+file_order,data_order = {},{}
+# Loop over all existing data files
+for fname in glob.glob(fullpath):
+    # Read hdf5 file
+    hfile = h5py.File(fname, "r")
+    # Extract segment information from file
+    segfile = file_to_segment(hfile,setname)
+    # Associate file in dictionary with association to segment data
+    file_order[segfile] = fname
+    data_order[segfile] = hfile
+# Retrieve sampling rate from last read file
+sample_rate = hfile[setname].attrs["SamplingRate(Hz)"]
+
+
+
+
+

Creating segment lists

+

This section will create a continuous list of all the data segments available. We use the following modules in order to create the list properly:

+
    +
  • The segmentlist module from the glue.segments library defines the list of segments. The =coalesce()= method is then used to put all the segments in coalesced state.
  • +
  • The DataQualityDict module from the gwpy.segments library allows to store all the data segments in an ordered dictionary.
  • +
  • The DataQualityFlag module from the gwpy.segments library allows to record times during which the instrument was operating outside of its nominal condition.
  • +
+

The script is as follows:

+
# Generate an ASCII representation of the GPS timestamped segments of time covered by the input data
+seglist = segmentlist(data_order.keys())
+# Sort the segment list
+seglist.sort()
+# Initialise dictionary for segment information
+full_seglist = DataQualityDict()
+# Save time span for each segment in ASCII file
+with open("segments.txt", "w") as fout:
+    for seg in seglist:
+        print >>fout, "%10.9f %10.9f" % seg
+# FIXME: Active should be masked from the sanity channel
+full_seglist[station] = DataQualityFlag(station,active=seglist.coalesce(),known=seglist.coalesce())
+# Define start and end time of entire dataset
+start, end = full_seglist[station].active.extent()
+
+
+
+
+

Establishing active times

+

Here’s the script:

+
# Generate an ASCII representation of the GPS timestamped segments of time covered by the input data
+seglist = segmentlist(data_order.keys())
+# Sort the segment list
+seglist.sort()
+# Import gwpy tools
+plot = SegmentPlot()
+# Initialize plotting figure
+ax = plot.gca()
+# Plot all segment in figure
+ax.plot(full_seglist)
+# Save figure
+pyplot.savefig("activity.png",dpi=500)
+
+
+
+
+

Retrieve and concatenate the data.

+

Here’s the script:

+
# Generate time series for the ensemble of data
+data_list = generate_timeseries(file_order,setname)
+# Retrieve channel data for all the segments
+full_data = numpy.hstack([retrieve_channel_data(data_order[seg],setname) for seg in seglist])
+# Define log base 2 of the total time length of the full data
+loglength = math.log(len(full_data)/sample_rate, 2)
+# Define zero padding
+zpad = math.ceil(loglength)
+zpad = int(2**zpad) - len(full_data)/sample_rate
+zpad = numpy.zeros(int(zpad*sample_rate / 2.0))
+# Include padding next to the data
+full_data = numpy.hstack((zpad, full_data, zpad))
+# Models a time series consisting of uniformly sampled scalar values
+ts_data = types.TimeSeries(full_data,delta_t=1/sample_rate,epoch=seglist[0][0])
+# Loop over all the elements in the dictionary
+for v in data_order.values():
+    # Close the element
+    v.close()
+
+
+
+
+
+

Producing fake data

+
+

Create simulated time series data

+

It is easy to create fake data, one can use the numpy.random.normal method from the Numpy library to draw random samples from a normal Gaussian distribution with mean of 0, standard deviation of 1, and a length equal to the sampling rate (args.sample_rate) times the length in seconds of individual segments (args.psd_segment_length) times the number of segment the user wish to produce. After defining the starting UTC time, one can then create a time series of the data using the TimeSeries module from the gwpy.timeseries library.:

+
print "Create fake data..."
+start = 1153742437.0
+end   = start + args.psd_segment_length * 16
+station = "gaussian-noise"
+setname = "MagneticFields"
+full_data = numpy.random.normal(0, 1, int(args.sample_rate * args.psd_segment_length * 16))
+ts_data = TimeSeries(full_data, sample_rate=args.sample_rate,epoch=start)
+
+
+
+
+

Produce and plot fake signal

+

Here’s the script:

+
delta_t = 1.0/args.sample_rate
+filter_band = 4
+#q = math.sqrt(2)*f_0/filter_band * 2
+#f_0 = 18
+duration = 0.1
+hrss = 0.0275
+#hp, hx = SimBurstSineGaussian(q * 2, f_0, hrss, 1, 0, data_dt)
+hp, hx = SimBurstGaussian(duration, hrss, delta_t)
+hp = TimeSeries.from_lal(hp)
+hx = TimeSeries.from_lal(hx)
+# We rescale the amplitude to hide or expose it in the data a bit better
+hp *= 100.
+
+pyplot.figure()
+pyplot.plot(hp.times, hp, 'k-')
+pyplot.xlim([-0.5, 0.5])
+pyplot.ylim([-0.1, 0.1]);
+pyplot.xlabel('Time (s)')
+pyplot.ylabel('Magnitude')
+pyplot.savefig('fakesignal.png')
+pyplot.close()
+
+
+
+
+

Inject fake signal into artificial data

+

Here’s the script:

+
random_time = int((start+end)/2.)
+st = (random_time-start)*args.sample_rate - len(hp)/2
+en = st + len(hp)
+hp.epoch = random_time
+ts_data[st:en] += hp
+data_list = [ts_data]
+ts_data = types.TimeSeries(ts_data.value,delta_t=1.0/args.sample_rate,epoch=start)
+
+
+
+
+
+
+

Plotting Data

+
+

Generate a plot of the data time series

+

Here’s the script:

+
# Include time series element in dictionary
+plot = TimeSeriesPlot()
+# Create axis in plot
+ax = plot.gca()
+# Loop over all the time series
+for ts in data_list:
+    # Plot time series for each segment
+    ax.plot(ts, color='blue')
+# Display title
+ax.set_title(station)
+# Plot activity segments
+plot.add_state_segments(SegmentList(full_seglist[station].active),plotargs={'label':'data present','facecolor': 'g','edgecolor': 'k'})
+# Define edges of the x axis
+ax.set_xlim(start, end)
+# Save figure
+plot.savefig('time_series.png',dpi=500)
+
+
+
+
+

Create sound based on the data

+

Here’s the script:

+
wout = wave.open("pure_tone.wav", "w")
+wout.setnchannels(1) # mono
+wout.setsampwidth(4) # 32 bit audio
+wout.setframerate(1000)
+wout.writeframes(ts[:])
+wout.close()
+
+
+
+
+

Invoking precision issues

+

AGG complexity starts to complain with large numbers of points and we somehow invoke precision issues that need to be ameliorated:

+
for d in data_list:
+    d.x0 = Quantity(int(d.x0.value * 500), d.xunit)
+    d.dx = Quantity(1, d.xunit)
+data_list.coalesce()
+for d in data_list:
+    d.x0 = Quantity(d.x0.value / 500, d.xunit)
+    d.dx = Quantity(0.002, d.xunit)
+
+
+
+
+

Amplitude Spectral Density (ASD)

+

Here’s the script:

+
# Initialize plotting functionality
+plot = SpectrumPlot()
+# Loop over all the time series
+for d in data_list:
+    # Generate 8 seconds per FFT with 4 second (50%) overlap
+    spectrum = d.asd(8, 4)
+    # Create plotting axis
+    ax = plot.gca()
+    # Plot square root of the spectrum
+    ax.plot(numpy.sqrt(spectrum))
+# Set x axis to log scale
+ax.set_xscale('log')
+# Set y axis to log scale
+ax.set_yscale('log')
+# Set x axis limits
+ax.set_xlim(1e-1, 500)
+# Save figure
+plot.savefig("asd.png",dpi=500)
+
+
+
+
+

(Un)normalized Spectrograms

+

The first thing to do is to initialise the plotting axis for both figure as well as some display settings specific to spectrogram and which can be loaded using the SpectrogramPlot() module from the gwpy.plotter library:

+
plot = SpectrogramPlot()
+ax = plot.gca()
+white_plot = SpectrogramPlot()
+wax = white_plot.gca()
+
+
+

The spectrogram is then created using the spectrogram function from the gwpy.timeseries.TimeSeries package. This will calculate the average power spectrogram of this TimeSeries using the specified average spectrum method (default being the Welch’s method). We define the 3 following variables that will be used to construct the spectrogram:

+
    +
  • stride: number of seconds in single PSD (column of spectrogram), default 20;
  • +
  • fftlength: number of seconds in single FFT, default 6;
  • +
  • overlap: number of seconds between FFTs, default 3.
  • +
+

We can then loop over all the time series made from each loaded HDF5 data file, and construct the spectrogram for each time series. The whitening of the spectrogram is then done by normalisation it, which can be performed using the ratio method from the gwpy.spectrogram.Spectrogram library. This will calculate the ratio of the created spectrogram against a specific reference, here we chose the reference to be the median of each spectrum in the given spectrogram:

+
+\[\sqrt{S(f,t)}/\sqrt{\overline{S(f)}}\]
+

The script is as follows:

+
for ts in data_list:
+    if (len(ts) * ts.dt).value < stride:
+        continue
+    spec = ts.spectrogram(stride, fftlength=fftlength, overlap=overlap)
+    ax.plot(spec)
+    wspec = spec.ratio('median')
+    wax.plot(wspec, vmin=0.1, vmax=100)
+
+
+

Finally, the plot can be completed by including the activity period below each figure:

+
ax.set_title(station)
+ax.set_xlim(seglist[0][0], seglist[-1][1])
+ax.set_ylim(1e-1, 500)
+ax.set_yscale('log')
+plot.add_colorbar(log=True)
+plot.add_state_segments(SegmentList(full_seglist[station].active),plotargs={'label':'data present','facecolor':'g','edgecolor':'k'})
+plot.savefig("spectrogram.png",dpi=500)
+
+wax.set_title(station)
+wax.set_xlim(seglist[0][0], seglist[-1][1])
+wax.set_ylim(1e-1, 500)
+wax.set_yscale('log')
+white_plot.add_colorbar(log=True)
+white_plot.add_state_segments(SegmentList(full_seglist[station].active),plotargs={'label':'data present','facecolor':'g','edgecolor':'k'})
+white_plot.savefig("whitened_spectrogram.png",dpi=500)
+
+
+
+
+
+

Excess-Power algorithm

+
+

General overview

+

The Excess Power method is known as the optimal detection strategy to search for burst signals for which only the duration and frequency band are known, which is basically the case for GNOME and its search of Axion-Like Particles (ALP). This method was developed and introduced by Anderson et al. (200) and has been extensively used in the detection of burst sources of gravitational radiation. A more technical documentation was written by Brady et al. (2007) describing how the algorithm used by the LIGO collaboration works and how the theory is translated into code.

+

We present below a step-by-step procedure followed during the Excess Power search analysis. For a better representation of what is happening, the figure at the end shows how the data is being split and analysed to search for multiple signals of different bandwidth and duration in the time-frequency plane.

+
    +
  • Time domain segmentation and PSD estimate

    +
    +

    We first estimate the instrument’s noise Power Spectral Density (PSD) by splitting the time-series data into multiple overlapping segments. A periodogram for each segment is calculated separately and then averaged, which will reduce the variance of the individual power measurements. The result is a frequency series where samples are separated in frequency space by \(\Delta f\) equal to the inverse of a segment’s length and with a high end frequency limit equal to the Nyquist limit. The final power spectrum will help reveal the existence, or the absence, of repetitive patterns and correlation structures in a signal process.

    +
    +
  • +
  • Comb of frequency channels

    +
    +

    We then split the PSD frequency series into multiple channels. For each channel, a frequency domain filter is created with a \(\Delta f\) determined by the PSD and a total extent in Fourier space that is twice the stated bandwidth of a channel. The result is a list of each channel filter’s frequency series.

    +
    +
  • +
  • Creating analysing blocks

    +
    +

    The Excess Power method can lead to moderately-large computational requirements, and it has been found that the computational efficiency of this implementation can be improved upon by considering blocks of data that are much longer than the longest signal time duration. The entire time series is therefore split into separate blocks. We use the length of the segments used for PSD estimate to define the duration of each block. For each block, the time series is c0Aonverted into frequency series which is then filtered by the filter bank throughout all the channels. A time-frequency map is finally created which stores all the filtered frequency series from each channel.

    +
    +
  • +
  • Creating tiles with different bandwidth

    +
    +

    We can now construct tiles with different bandwidth by summing multiple channels together.

    +
    +
  • +
  • Exploring tiles with different duration

    +
    +

    For each given tile’s bandwidth, one can investigate different tile’s duration. This can be done by exploring different number of degrees of freedom, \(d\), which can be calculated as follows: \(d=2BT\) where \(B\) and \(T\) are respectively the bandwidth and duration of the tile. Section 2.2.5 of Brady et al. gives a great description of how to interpret the number of degrees of freedom. Therefore, by changing the \(d\), one can explore multiple tile’s duration for different bandwidth.

    +
    +
  • +
  • Define triggering signal

    +
    +

    The energy of each tile in the time-frequency space is calculated and compare to a user-defined threshold value. After defining a tile false alarm probability threshold in Gaussian noise and using the number of degrees of freedom for each tile, one can define a energy threshold value above which a burst trigger can be identified by comparing the energy threshold with the tile’s energy in the time-frequency map. A tile energy time frequency map plot similar to Figure 5 in Pustelny et al. (2013) can then be made which plots the outlying tile energies present in the data.

    +
    +
  • +
+
+_images/overview.png +

Overview of the Excess Power method and difference between segments, channels, tiles and blocks.

+
+
+
+

Estimate Power Spectral Density (PSD)

+

The instrument’s noise Power Spectral Density (PSD) will be used to whiten the data and help reveal the existence, or the absence, of repetitive patterns and correlation structures in the signal process. It will also determine the total bandwidth spanned by each of the filters that will subsequently be created. The first thing to do before calculating the PSD is to ensure that the time series data is converted into an array of floating values.

+
# Convert time series as array of float
+data = ts_data.astype(numpy.float64)
+
+
+

The PSD is calculated by splitting up the signal into overlapping segments and scan through each segment to calculate individual periodogram. The periodograms from each segment are then averaged, reducing the variance of the individual power measurements. In order to proceed, we need to define the average method, avg_method, that will be used to measure the PSD from the data. This can be specified with the --psd-estimation option.

+
# Average method to measure PSD from the data
+avg_method = args.psd_estimation
+
+
+

One also needs to specify the length of each segment, seg_len, as well as the separation between 2 consecutive segments, seg_stride. Both parameters can be defined in second units with the --psd-segment-length and --psd-segment-stride arguments respectively and can then be converted into sample unit.

+
# The segment length for PSD estimation in samples
+seg_len = int(args.psd_segment_length * args.sample_rate)
+# The separation between consecutive segments in samples
+seg_stride = int(args.psd_segment_stride * args.sample_rate)
+
+
+

We then use the Welch’s method to perform the power spectral density estimate using the welch module from the pycbc.psd library. What this will do is to compute the discrete Fourier transform for each PSD segment to produce invidual periodograms, and then compute the squared magnitude of the result. The individual periodograms are then averaged using the user-defined average method, avg_method, and return the frequency series, fd_psd, which will store the power measurement for each frequency bin.

+
# Lifted from the psd.from_cli module
+fd_psd = psd.welch(data,avg_method=avg_method,seg_len=seg_len,seg_stride=seg_stride)
+# Plot the power spectral density
+plot_spectrum(fd_psd)
+# We need this for the SWIG functions
+lal_psd = fd_psd.lal()
+
+
+

One can display the power measurements, frequency array and frequency between consecutive samples, \(\Delta f\) in Hertz, by printing the following variables:

+
print 'Display power measurements of the first 10 frequency bins'
+print fd_psd[:10]
+print 'Display central frequency of the first 10 bins'
+print fd_psd.sample_frequencies[:10]
+print 'Display the frequency separation between bins'
+print fd_psd.delta_f
+
+
+

\(\Delta f\) corresponds to the inverse of a segment’s length which is the smallest frequency (i.e. highest period) of detectable signals in each segment. The frequency range spans from 0 to the Nyquist frequency, i.e. half de the sampling rate.

+
+
+

Checking filtering settings

+

The first thing to check is that the frequency of the high-pass filter (if defined) is below the minimum frequency of the filter bank. Indeed, a high-pass filter will only let pass frequency that are higher than the cutoff frequency (here defined by the strain_high_pass argument). If the high pass frequency is greater from the minimum frequency in the filter bank, the signal with frequencies lower than the cutoff frequency will get attenuated.

+
if args.min_frequency < args.strain_high_pass:
+    print >>sys.stderr, "Warning: strain high pass frequency %f is greater than the tile minimum frequency %f --- this is likely to cause strange output below the bandpass frequency" % (args.strain_high_pass, args.min_frequency)
+
+
+

In case the maximum frequency in the filter bank is not defined, we set it to be equal to the Nyquist frequency, i.e. half the sampling rate, which makes sense as a larger signal will not be able to get easily identifiable.

+
if args.max_frequency is None:
+    args.max_frequency = args.sample_rate / 2.0
+
+
+

If the bandwidth of the finest filter (--tile-bandwidth argument, see section construct_args or the number of frequency channels (=–channels= argument) is not defined but the total spectral band is (data_band), one can then determined all the filter settings as follows:

+
if args.tile_bandwidth is None and args.channels is None:
+    # Exit program with error message
+    exit("Either --tile-bandwidth or --channels must be specified to set up time-frequency plane")
+else:
+    # Define as assert statement that tile maximum frequency larger than its minimum frequency
+    assert args.max_frequency >= args.min_frequency
+    # Define spectral band of data
+    data_band = args.max_frequency - args.min_frequency
+    # Check if tile bandwidth or channel is defined
+    if args.tile_bandwidth is not None:
+        # Define number of possible filter bands
+        nchans = args.channels = int(data_band / args.tile_bandwidth)  - 1
+    elif args.channels is not None:
+        # Define filter bandwidth
+        band = args.tile_bandwidth = data_band / (args.channels + 1)
+    assert args.channels > 1
+
+
+

The minimum frequency to be explored can be user-defined by using the --min-frequency option.

+
# Lowest frequency of the first filter
+flow = args.min_frequency
+
+
+
+
+

Whitening window and spectral correlation

+

This part determines how much data on either side of the tukey window is to be discarded. Nominally, this means that one will lose window_fraction * args.psd_segment_length to corruption from the window, i.e. this is simply discarded. This is tuned to give an integer offset when used with args.psd_segment_length equal to 8, smaller windows will have fractions of integers, but larger powers of two will still preseve this (probably not a big deal in the end).

+
window_fraction = 0
+
+
+

The two point spectral correlation is then done with the calculate_spectral_correlation function which will return both the Tukey window applied to the original time series data and the actual two-point spectral correlation function for the whitened frequency series from the applied whitening window.

+
# Do two point spectral correlation
+window, spec_corr = calculate_spectral_correlation(seg_len,'tukey',window_fraction=window_fraction)
+window = window.data.data
+window_sigma_sq = numpy.mean(window**2)
+# Pre scale the window by its root mean squared -- see eqn 11 of EP document
+#window /= numpy.sqrt(window_sigma_sq)
+
+
+
+
+

Computing the filter bank

+

The filter bank will create band-pass filters for each channel in the PSD frequency domain. The create_filter_bank function will san the bandwidth from the central frequency of the first channel (i.e. flow+band/2) to final frequency of the last channel (i.e. band*nchans) in a increment equal to the frequency band. The filter’s total extent in Fourier space is actually twice the stated bandwidth (FWHM).

+
# Define filters
+filter_bank, fdb = create_filter_bank(fd_psd.delta_f, flow+band/2, band, nchans, fd_psd, spec_corr)
+
+
+

This function will returns 2 arrays: the filter_bank array which is a list of COMPLEX16FrequencySeries arrays corresponding to each channel’s filter, and the =fdb= array which provides the time-series from each filter. The length of each array is equal to the total number of channel (i.e. =nchans=). The filter’s data, \(\Delta f\) value, and first and last frequencies of any channel’s filter can be displayed as followed:

+
# Print data of first channel's filter
+print filter_bank[0].data.data
+# Print frequency separation between 2 values in the first channel's filter
+print filter_bank[0].deltaF
+# Print first frequency of the first channel's filter
+print filter_bank[0].f0
+# Print last frequency of the first channel's filter (equal to twice the channel's bandwidth)
+print filter_bank[0].f0+(len(filter_bank[0].data.data)-1)*filter_bank[0].deltaF
+
+
+

Further in the analysis, the following filters will used: +1. white_filter_ip: Whitened filter inner products computed with themselves. +2. unwhite_filter_ip: Unwhitened filter inner products computed with themselves. +3. white_ss_ip: Whitened filter inner products computed between input adjacent filters. +4. unwhite_ss_ip: Unwhitened filter inner products computed between input adjacent filters.

+
# This is necessary to compute the mu^2 normalizations
+white_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, None)
+unwhite_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, lal_psd)
+# These two are needed for the unwhitened mean square sum (hrss)
+white_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, None)
+unwhite_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, lal_psd)
+
+
+
+
+

Normalization of virtual channel

+

The virtual channels will be used during the excesspower analysis to explore different frequency ranges around each PSD segments and look for possible triggers. Each channel is renormalized using the compute_channel_renomalization internal function.

+
# Initialise dictionary
+mu_sq_dict = {}
+# nc_sum additional channel adds
+for nc_sum in range(0, int(math.log(nchans, 2))):
+    min_band = (len(filter_bank[0].data.data)-1) * filter_bank[0].deltaF / 2
+    print tprint(t0,t1),"Calculation for %d %d Hz channels" % (nc_sum+1, min_band)
+    nc_sum = 2**nc_sum - 1
+    mu_sq_dict[nc_sum] = compute_channel_renomalization(nc_sum, filter_bank, spec_corr, nchans)
+
+
+
+
+

Initialise event list and determine stride boundaries

+

First of all, we create a table similar than the one made by the LIGO Scientific Collaboration (LSC) where all the information will be stored. Such table is commonly know as lsctables. A pre-defined LSC table can be constructed using New function from the glue.ligolw.lsctables module. We use the SnglBurstTable function for the type of data to be stored and define all the columns we wish to record.

+
# Create event list for single burst table
+event_list = lsctables.New(lsctables.SnglBurstTable,
+                           ['start_time','start_time_ns','peak_time','peak_time_ns',
+                            'duration','bandwidth','central_freq','chisq_dof',
+                            'confidence','snr','amplitude','channel','ifo',
+                            'process_id','event_id','search','stop_time','stop_time_ns'])
+
+
+

We also need to determine the indexes of both starting and ending times for the first segment to analyse, respectively t_idx_min and t_idx_max. The default values are considered to be 0 for the starting index and the segment length in sample unit for the ending time index. Also, if the user defines a different starting time than the one from the loaded data, the offset index in sample unit is determined and added the both starting and ending time indexes.

+
# Determine boundaries of stride in time domain
+t_idx_min, t_idx_max = 0, seg_len
+# Check if user requested starting time is defined
+if args.analysis_start_time is not None:
+    # Define the time difference in seconds between data and user requested starting times
+    t_idx_off = args.analysis_start_time - ts_data.start_time
+    # Calculate the index of the user requested starting point in the data
+    t_idx_off = int(t_idx_off * args.sample_rate)
+else:
+    # Define index of the starting point as first value in data
+    t_idx_off = 0
+# Initialise minimum index values as offset starting index
+t_idx_min += t_idx_off
+# Initialise maximum index values as offset starting index
+t_idx_max += t_idx_off
+
+
+

Finally, the index for the ending time after all the segments have been analysed can be estimated for the user-defined parameter or is defined as the length of the time series data ts_data.

+
# Check if user requested end time is defined
+if args.analysis_end_time is not None:
+    # Define the time difference between data and user requested ending times
+    t_idx_max_off = args.analysis_end_time - ts_data.start_time
+    # Calculate the index of the user requested starting point in the data
+    t_idx_max_off = int(t_idx_max_off * args.sample_rate)
+else:
+    # Define index of the ending point as the length of data array
+    t_idx_max_off = len(ts_data)
+
+
+
+
+

Define analysing blocks

+

The first thing we do is to calculate the time series for the segment that is covered (tmp_ts_data) and redefined the metadata, especially the time of the first sample in seconds which is defined by the epoch argument and is different for every segment. After plotting the time series for that segment, the data are then converted into frequency series (fs_data) using the to_frequencyseries module from the pycbc.types.timeseries.TimeSeries library. Finally, the frequency data are then whitened.

+
# Loop over each data within the user requested time period
+while t_idx_max <= t_idx_max_off:
+    # Define starting and ending time of the segment in seconds
+    start_time = ts_data.start_time + t_idx_min/float(args.sample_rate)
+    end_time = ts_data.start_time + t_idx_max/float(args.sample_rate)
+    print tprint(t0,t1),"Analyzing block %i to %i (%.2f percent)"%(start_time,end_time,100*float(t_idx_max)/float(idx_max_off))
+    # Model a withen time series for the block
+    tmp_ts_data = types.TimeSeries(ts_data[t_idx_min:t_idx_max]*window, 1.0/args.sample_rate,epoch=start_time)
+    # Save time series in segment repository
+    segfolder = 'segments/%i-%i'%(start_time,end_time)
+    os.system('mkdir -p '+segfolder)
+    plot_ts(tmp_ts_data,fname='%s/ts.png'%(segfolder))
+    # Convert times series to frequency series
+    fs_data = tmp_ts_data.to_frequencyseries()
+    print tprint(t0,t1),"Frequency series data has variance: %s" % fs_data.data.std()**2
+    # Whitening (FIXME: Whiten the filters, not the data)
+    fs_data.data /= numpy.sqrt(fd_psd) / numpy.sqrt(2 * fd_psd.delta_f)
+    print tprint(t0,t1),"Whitened frequency series data has variance: %s" % fs_data.data.std()**2
+
+
+
+
+

Create time-frequency map for each block

+

We initialise a 2D zero array for a time-frequency map (tf_map) which will be computed for each frequency-domain filter associated to each PSD segment and where the filtered time-series for each frequency channels will be stored. The number of rows corresponds to the total number of frequency channels which is defined by the nchans variable. The number of columns corresponds to the segment length in samples (i.e. the number of samples covering one segment) which is defined by the seg_len variable.

+
# Initialise 2D zero array for time-frequency map
+tf_map = numpy.zeros((nchans, seg_len), dtype=numpy.complex128)
+
+
+

We also initialise a zero vector for a temporary filter bank (tmp_filter_bank) that will store, for a given channel, the filter’s values from the original filter bank (filter_bank) for that channel only. The length of the temporary filter bank is equal to the length of the PSD frequency series (fd_psd).

+
# Initialise 1D zero array
+tmp_filter_bank = numpy.zeros(len(fd_psd), dtype=numpy.complex128)
+
+
+

We then loop over all the frequency channels. While in the loop, we first re-initialise the temporary filter bank with zero values everywhere along the frequency series. We then determine the first and last frequency of each channel and re-define the values of the filter in that frequency range based on the values from the original channel’s filter from the original filter bank.

+
# Loop over all the channels
+print tprint(t0,t1),"Filtering all %d channels..." % nchans
+for i in range(nchans):
+    # Reset filter bank series
+    tmp_filter_bank *= 0.0
+    # Index of starting frequency
+    f1 = int(filter_bank[i].f0/fd_psd.delta_f)
+    # Index of ending frequency
+    f2 = int((filter_bank[i].f0 + 2*band)/fd_psd.delta_f)+1
+    # (FIXME: Why is there a factor of 2 here?)
+    tmp_filter_bank[f1:f2] = filter_bank[i].data.data * 2
+
+
+

We then extract the frequency series from the filter bank for that channel, which will be used as a template waveform to filter the actual data from the channel.

+
# Define the template to filter the frequency series with
+template = types.FrequencySeries(tmp_filter_bank, delta_f=fd_psd.delta_f, copy=False)
+
+
+

Finally, we use the matched_filter_core module from the pycbc.filter.matchedfilter library to filter the frequency series from the channel. This will return both a time series containing the complex signal-to-noise matched filtered against the data, and a frequency series containing the correlation vector.

+
# Create filtered series
+filtered_series = filter.matched_filter_core(template,fs_data,h_norm=None,psd=None,
+                                             low_frequency_cutoff=filter_bank[i].f0,
+                                             high_frequency_cutoff=filter_bank[i].f0+2*band)
+
+
+

The matched filter is the optimal linear filter for maximizing the signal to noise ratio (SNR) in the presence of additive stochastic noise. The filtered time series is stored in the time-frequency map and can be used to produce a spectrogram of the segment of data being analysed.

+
# Include filtered series in the map
+tf_map[i,:] = filtered_series[0].numpy()
+
+
+

The time-frequency map is a 2D array with a length that corresponds to the number of channels and a width equal to the number of sample present in one segment of data, i.e. segment’s length in seconds times the the sampling rate. The map can finally be plotted with a \(\Delta t\) corresponding to the sampling period of the original dataset (i.e. inverse of the original sampling rate), and \(\Delta f\) is equal to the bandwidth of one channel.

+
plot_spectrogram(numpy.abs(tf_map).T,tmp_ts_data.delta_t,fd_psd.delta_f,ts_data.sample_rate,start_time,end_time,fname='%s/tf.png'%(segfolder))
+
+
+
+
+

Constructing tiles of different bandwidth

+

First and foremost, we define a clipping region in the data to be used to remove window corruption, this is non-zero if the window_fraction variable is set to a non-zero value.

+
print tprint(t0,t1),"Beginning tile construction..."
+# Clip the boundaries to remove window corruption
+clip_samples = int(args.psd_segment_length * window_fraction * args.sample_rate / 2)
+
+
+

In order to perform a multi-resolution search, tiles of many different bandwidths and durations will be scanned. We first need to setup a loop such that the maximum number of additional channel is equal to the base 2 logarithm of the total number of channels. The number of narrow band channels to be summed (nc_sum) would therefore be equal to 2 to the power of the current quantity of additional channels.

+
for nc_sum in range(0, int(math.log(nchans, 2)))[::-1]: # nc_sum additional channel adds
+    nc_sum = 2**nc_sum - 1
+    print tprint(t0,t1,t2),"Summing %d narrow band channels..." % (nc_sum+1)
+
+
+

The undersampling rate for this tile can be calculated using the channel frequency band and the number of narrow band channels to be summed such that the bandwidth of the tile is equal to band * (nc_sum + 1).

+
us_rate = int(round(1.0 / (2 * band*(nc_sum+1) * ts_data.delta_t)))
+print >>sys.stderr, "Undersampling rate for this level: %f" % (args.sample_rate/us_rate)
+
+
+

“Virtual” wide bandwidth channels are constructed by summing the samples from multiple channels, and correcting for the overlap between adjacent channel filters. We then define the normalised channel at the current level and create a time frequency map for this tile using the make_indp_tiles internal function. In other word, we are constructing multiple sub-tiles for which we can determined the respective energy in the given frequency band.

+
mu_sq = mu_sq_dict[nc_sum]
+sys.stderr.write("\t...calculating tiles...")
+if clip_samples > 0:
+    tiles = make_indp_tiles(tf_map[:,clip_samples:-clip_samples:us_rate], nc_sum, mu_sq)
+else:
+    tiles = make_indp_tiles(tf_map[:,::us_rate], nc_sum, mu_sq)
+sys.stderr.write(" TF-plane is %dx%s samples... " % tiles.shape)
+print >>sys.stderr, " done"
+print "Tile energy mean: %f, var %f" % (numpy.mean(tiles), numpy.var(tiles))
+
+
+
+
+

Explore multiple tile durations

+

Now that we create a tile with a specific bandwidth, we can start exploring different durations for the tile. We will start checking if the user manually defined a value for the longest duration tile to compute, which can be done using the --max-duration argument. If not, the value will be set to 32.

+
if args.max_duration is not None:
+    max_dof = 2 * args.max_duration * (band * (nc_sum+1))
+else:
+    max_dof = 32
+assert max_dof >= 2
+
+
+

Since we produce (initially) tiles with 1 degree of freedom, the duration goes as one over twice the bandwidth.

+
print "\t\t...getting longer durations..."
+#for j in [2**l for l in xrange(1, int(math.log(max_dof, 2))+1)]:
+for j in [2**l for l in xrange(0, int(math.log(max_dof, 2)))]:
+    sys.stderr.write("\t\tSumming DOF = %d ..." % (2*j))
+    #tlen = tiles.shape[1] - j + 1
+    tlen = tiles.shape[1] - 2*j + 1 + 1
+    if tlen <= 0:
+        print >>sys.stderr, " ...not enough samples."
+        continue
+    dof_tiles = numpy.zeros((tiles.shape[0], tlen))
+    #:sum_filter = numpy.ones(j)
+    # FIXME: This is the correct filter for 50% overlap
+    sum_filter = numpy.array([1,0] * (j-1) + [1])
+    #sum_filter = numpy.array([1,0] * int(math.log(j, 2)-1) + [1])
+    for f in range(tiles.shape[0]):
+        # Sum and drop correlate tiles
+        # FIXME: don't drop correlated tiles
+        #output = numpy.convolve(tiles[f,:], sum_filter, 'valid')
+        dof_tiles[f] = fftconvolve(tiles[f], sum_filter, 'valid')
+    print >>sys.stderr, " done"
+    print "Summed tile energy mean: %f, var %f" % (numpy.mean(dof_tiles), numpy.var(dof_tiles))
+    level_tdiff = time.time() - tdiff
+    print >>sys.stderr, "Done with this resolution, total %f" % level_tdiff
+
+
+

Finally, the bandwidth and duration of the tile can be defined as followed:

+
# Current bandwidth of the time-frequency map tiles
+current_band = band * (nc_sum + 1)
+# How much each "step" is in the frequency domain -- almost
+# assuredly the fundamental bandwidth
+df = current_band
+# How much each "step" is in the time domain -- under sampling rate
+# FIXME: THis won't work if the sample rate isn't a power of 2
+dt = 1.0 / 2 / (2 * current_band) * 2
+full_band = 250
+dt = current_band / full_band * ts_data.sample_rate
+dt = 1.0/dt
+# Duration is fixed by the NDOF and bandwidth
+duration = j / 2.0 / current_band
+
+
+
+
+

Trigger finding

+

In order to find any trigger in the data, we first need to set a false alarm probability threshold in Gaussian noise above which signal will be distinguished from the noise. Such threshold can be determined by using the /inverse survival function/ method from the scipy.stats.chi2 package.

+
threshold = scipy.stats.chi2.isf(args.tile_fap, j)
+print "Threshold for this level: %f" % threshold
+#if numpy.any(dof_tiles > threshold):
+    #plot_spectrogram(dof_tiles.T)
+    #import pdb; pdb.set_trace()
+
+
+

Once the threshold is set, one can then run the trigger_list_from_map function to quickly find the trigger signal from the dof_tiles array that

+
# Since we clip the data, the start time needs to be adjusted accordingly
+window_offset_epoch = fs_data.epoch + args.psd_segment_length * window_fraction / 2
+trigger_list_from_map(dof_tiles, event_list, threshold, window_offset_epoch, filter_bank[0].f0 + band/2, duration, current_band, df, dt, None)
+for event in event_list[::-1]:
+    if event.amplitude != None:
+        continue
+    etime_min_idx = float(event.get_start()) - float(fs_data.epoch)
+    etime_min_idx = int(etime_min_idx / tmp_ts_data.delta_t)
+    etime_max_idx = float(event.get_start()) - float(fs_data.epoch) + event.duration
+    etime_max_idx = int(etime_max_idx / tmp_ts_data.delta_t)
+    # (band / 2) to account for sin^2 wings from finest filters
+    flow_idx = int((event.central_freq - event.bandwidth / 2 - (band / 2) - flow) / band)
+    fhigh_idx = int((event.central_freq + event.bandwidth / 2 + (band / 2) - flow) / band)
+    # TODO: Check that the undersampling rate is always commensurate
+    # with the indexing: that is to say that
+    # mod(etime_min_idx, us_rate) == 0 always
+    z_j_b = tf_map[flow_idx:fhigh_idx,etime_min_idx:etime_max_idx:us_rate]
+    # FIXME: Deal with negative hrss^2 -- e.g. remove the event
+    try:
+        event.amplitude = measure_hrss(z_j_b, unwhite_filter_ip[flow_idx:fhigh_idx], unwhite_ss_ip[flow_idx:fhigh_idx-1], white_ss_ip[flow_idx:fhigh_idx-1], fd_psd.delta_f, tmp_ts_data.delta_t, len(filter_bank[0].data.data), event.chisq_dof)
+    except ValueError:
+        event.amplitude = 0
+
+print "Total number of events: %d" % len(event_list)
+
+
+
+
+

Switch to new block

+

The following will move the frequency band to the next segment:

+
tdiff = time.time() - tdiff
+print "Done with this block: total %f" % tdiff
+
+t_idx_min += int(seg_len * (1 - window_fraction))
+t_idx_max += int(seg_len * (1 - window_fraction))
+
+
+
+
+

Extracting GPS time range

+

We use the LIGOTimeGPS structure from the =glue.lal= package to /store the starting and ending time in the dataset to nanosecond precision and synchronized to the Global Positioning System time reference/. Once both times are defined, the range of value is stored in a semi-open interval using the segment module from the =glue.segments= package.

+
# Starting epoch relative to GPS starting epoch
+start_time = LIGOTimeGPS(args.analysis_start_time or args.gps_start_time)
+# Ending epoch relative to GPS ending epoch
+end_time = LIGOTimeGPS(args.analysis_end_time or args.gps_end_time)
+# Represent the range of values in the semi-open interval
+inseg = segment(start_time,end_time)
+
+
+
+
+

Prepare output file for given time range

+
xmldoc = ligolw.Document()
+xmldoc.appendChild(ligolw.LIGO_LW())
+
+ifo = args.channel_name.split(":")[0]
+proc_row = register_to_xmldoc(xmldoc, __program__, args.__dict__, ifos=[ifo],version=glue.git_version.id, cvs_repository=glue.git_version.branch, cvs_entry_time=glue.git_version.date)
+
+# Figure out the data we actually analyzed
+outseg = determine_output_segment(inseg, args.psd_segment_length, args.sample_rate, window_fraction)
+
+ss = append_search_summary(xmldoc, proc_row, ifos=(station,), inseg=inseg, outseg=outseg)
+
+for sb in event_list:
+    sb.process_id = proc_row.process_id
+    sb.search = proc_row.program
+    #sb.ifo, sb.channel = args.channel_name.split(":")
+    sb.ifo, sb.channel = station, setname
+
+xmldoc.childNodes[0].appendChild(event_list)
+fname = make_filename(station, inseg)
+
+utils.write_filename(xmldoc, fname, gz=fname.endswith("gz"), verbose=True)
+
+
+
+
+

Plot trigger results

+
events = SnglBurstTable.read(fname+'.gz')
+#del events[10000:]
+plot = events.plot('time', 'central_freq', "duration", "bandwidth", color="snr")
+#plot = events.plot('time', 'central_freq', color='snr')
+#plot.set_yscale("log")
+plot.set_ylim(1e-0, 250)
+t0 = 1153742417
+plot.set_xlim(t0 + 0*60, t0 + 1*60)
+#plot.set_xlim(t0 + 28, t0 + 32)
+pyplot.axvline(t0 + 30, color='r')
+cb = plot.add_colorbar(cmap='viridis')
+plot.savefig("triggers.png")
+
+
+
+
+
+

Module Access

+
+

Extract Magnetic Field Data

+

Extract magnetic field data from HDF5 files.

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + +
magfield(station, starttime, endtime[, ...])Glob all files withing user-defined period and extract data.
file_to_segment(hfile, segname)Define length of data segment.
construct_utc_from_metadata(datestr, t0str)
generate_timeseries(data_list[, setname])Generate time series using list of HDF5 data file paths
create_activity_list(station, data_order)Create consecutive list of available data segment.
retrieve_data_timeseries(hfile, setname)Retrieve data time series from HDF5 data file
retrieve_channel_data(hfile, setname)Retrieve the data from specific channel
+
+
+

Plotting routines

+

Methods to produce time-frequency plots and others

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
plot_activity(full_seglist)Plot full activity period for station.
plot_time_series(station, ts_list[, seglist, hp])Generate a plot of the whole data time series
plot_asd(station, ts_list)Plot Amplitude Spectral Density.
plot_whitening(station, ts_list[, seglist])Generate a spectrogram plot and normalized spectrogram
plot_ts(ts[, fname])
plot_spectrum(fd_psd)
plot_spectrogram(spec, dt, df, sample_rate, ...)
plot_spectrogram_from_ts(ts)
plot_triggers()
+
+
+

Excess Power Search Analysis

+

Main class to do excess-power search analysis

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
excess_power(ts_data, psd_segment_length, ...)Perform excess-power search analysis on magnetic field data.
check_filtering_settings(sample_rate, ...)Check filtering settings and define the total number of channels and bandwidth to use for filter bank.
calculate_psd(ts_data, sample_rate, ...)Estimate Power Spectral Density (PSD)
calculate_spectral_correlation(fft_window_len)Calculate the two point spectral correlation introduced by windowing the data before transforming to the frequency domain – valid choices are ‘hann’ and ‘tukey’.
create_filter_bank(delta_f, flow, band, ...)Create filter bank
convert_to_time_domain(fdb, sample_rate)Convert filter bank from frequency to time domain
identify_block(ts_data, fd_psd, window, ...)Get frequency series of the current block
create_tf_plane(fd_psd, nchans, seg_len, ...)Create time-frequency map
compute_filter_ips_self(lal_filters, spec_corr)Compute a set of inner products of input filters with themselves.
compute_filter_ips_adjacent(lal_filters, ...)Compute a set of filter inner products between input adjacent filters.
compute_channel_renormalization(filter_bank, ...)Compute the renormalization for the base filters up to a given bandwidth.
measure_hrss(z_j_b, uw_ss_ii, uw_ss_ij, ...)Approximation of unwhitened sum of squares signal energy in a given EP tile.
measure_hrss_slowly(z_j_b, lal_filters, ...)Approximation of unwhitened sum of squares signal energy in a given EP tile.
measure_hrss_poorly(tile_energy, sub_psd)
trigger_list_from_map(tfmap, event_list, ...)
determine_output_segment(inseg, dt_stride, ...)Given an input data stretch segment inseg, a data block stride dt_stride, the data sample rate, and an optional window_fraction, return the amount of data that can be processed without corruption effects from the window.
make_tiles(tf_map, nc_sum, mu_sq)
make_indp_tiles(tf_map, nc_sum, mu_sq)Create a time frequency map with resolution of tf_map binning divided by nc_sum + 1.
make_filename(ifo, seg[, tag, ext])
construct_tiles(nc_sum, mu_sq, band, ...)Constructing tile and calculate their energy
create_tile_duration(j, df, duration, tiles)
create_xml(ts_data, psd_segment_length, ...)
+
+
+

Utilities

+

Independent routines to do various other things

+ ++++ + + + + + +
create_sound(ts)Create sound based on the data
+
+
+ + +
+ + + + + \ No newline at end of file diff --git a/docs/epower_overview.html b/docs/epower_overview.html new file mode 100644 index 0000000..c81fdd9 --- /dev/null +++ b/docs/epower_overview.html @@ -0,0 +1,127 @@ + + + + + + + + Excess Power - Overview — gdas 0.2.9 documentation + + + + + + + + + + + + + + + + + +
+ + +
+

Excess Power - Overview

+

The Excess Power method is known as the optimal detection strategy to search for burst signals for which only the duration and frequency band are known, which is basically the case for GNOME and its search of Axion-Like Particles (ALP). This method was developed and introduced by Anderson et al. (200) and has been extensively used in the detection of burst sources of gravitational radiation. A more technical documentation was written by Brady et al. (2007) describing how the algorithm used by the LIGO collaboration works and how the theory is translated into code.

+

We present below a step-by-step procedure followed during the Excess Power search analysis. For a better representation of what is happening, the figure at the end shows how the data is being split and analysed to search for multiple signals of different bandwidth and duration in the time-frequency plane.

+
    +
  • Time domain segmentation and PSD estimate

    +
    +

    We first estimate the instrument’s noise Power Spectral Density (PSD) by splitting the time-series data into multiple overlapping segments. A periodogram for each segment is calculated separately and then averaged, which will reduce the variance of the individual power measurements. The result is a frequency series where samples are separated in frequency space by \(\Delta f\) equal to the inverse of a segment’s length and with a high end frequency limit equal to the Nyquist limit. The final power spectrum will help reveal the existence, or the absence, of repetitive patterns and correlation structures in a signal process.

    +
    +
  • +
  • Comb of frequency channels

    +
    +

    We then split the PSD frequency series into multiple channels. For each channel, a frequency domain filter is created with a \(\Delta f\) determined by the PSD and a total extent in Fourier space that is twice the stated bandwidth of a channel. The result is a list of each channel filter’s frequency series.

    +
    +
  • +
  • Creating analysing blocks

    +
    +

    The Excess Power method can lead to moderately-large computational requirements, and it has been found that the computational efficiency of this implementation can be improved upon by considering blocks of data that are much longer than the longest signal time duration. The entire time series is therefore split into separate blocks. We use the length of the segments used for PSD estimate to define the duration of each block. For each block, the time series is c0Aonverted into frequency series which is then filtered by the filter bank throughout all the channels. A time-frequency map is finally created which stores all the filtered frequency series from each channel.

    +
    +
  • +
  • Creating tiles with different bandwidth

    +
    +

    We can now construct tiles with different bandwidth by summing multiple channels together.

    +
    +
  • +
  • Exploring tiles with different duration

    +
    +

    For each given tile’s bandwidth, one can investigate different tile’s duration. This can be done by exploring different number of degrees of freedom, \(d\), which can be calculated as follows: \(d=2BT\) where \(B\) and \(T\) are respectively the bandwidth and duration of the tile. Section 2.2.5 of Brady et al. gives a great description of how to interpret the number of degrees of freedom. Therefore, by changing the \(d\), one can explore multiple tile’s duration for different bandwidth.

    +
    +
  • +
  • Define triggering signal

    +
    +

    The energy of each tile in the time-frequency space is calculated and compare to a user-defined threshold value. After defining a tile false alarm probability threshold in Gaussian noise and using the number of degrees of freedom for each tile, one can define a energy threshold value above which a burst trigger can be identified by comparing the energy threshold with the tile’s energy in the time-frequency map. A tile energy time frequency map plot similar to Figure 5 in Pustelny et al. (2013) can then be made which plots the outlying tile energies present in the data.

    +
    +
  • +
+
+_images/overview.png +

Overview of the Excess Power method and difference between segments, channels, tiles and blocks.

+
+
+

Code access

+ ++++ + + + + + +
excess_power(ts_data, psd_segment_length, ...)Perform excess-power search analysis on magnetic field data.
+
+
+ + +
+ + + + + \ No newline at end of file diff --git a/docs/epower_step1_checkfilt.html b/docs/epower_step1_checkfilt.html new file mode 100644 index 0000000..b458605 --- /dev/null +++ b/docs/epower_step1_checkfilt.html @@ -0,0 +1,123 @@ + + + + + + + + Excess Power - Step 1: Checking filtering settings — gdas 0.2.9 documentation + + + + + + + + + + + + + + + + + +
+ + +
+

Excess Power - Step 1: Checking filtering settings

+

The first thing to check is that the frequency of the high-pass filter (if defined) is below the minimum frequency of the filter bank. Indeed, a high-pass filter will only let pass frequency that are higher than the cutoff frequency (here defined by the strain_high_pass argument). If the high pass frequency is greater from the minimum frequency in the filter bank, the signal with frequencies lower than the cutoff frequency will get attenuated.

+
if args.min_frequency < args.strain_high_pass:
+    print >>sys.stderr, "Warning: strain high pass frequency %f is greater than the tile minimum frequency %f --- this is likely to cause strange output below the bandpass frequency" % (args.strain_high_pass, args.min_frequency)
+
+
+

In case the maximum frequency in the filter bank is not defined, we set it to be equal to the Nyquist frequency, i.e. half the sampling rate, which makes sense as a larger signal will not be able to get easily identifiable.

+
if args.max_frequency is None:
+    args.max_frequency = args.sample_rate / 2.0
+
+
+

If the bandwidth of the finest filter (--tile-bandwidth argument, see section construct_args or the number of frequency channels (=–channels= argument) is not defined but the total spectral band is (data_band), one can then determined all the filter settings as follows:

+
if args.tile_bandwidth is None and args.channels is None:
+    # Exit program with error message
+    exit("Either --tile-bandwidth or --channels must be specified to set up time-frequency plane")
+else:
+    # Define as assert statement that tile maximum frequency larger than its minimum frequency
+    assert args.max_frequency >= args.min_frequency
+    # Define spectral band of data
+    data_band = args.max_frequency - args.min_frequency
+    # Check if tile bandwidth or channel is defined
+    if args.tile_bandwidth is not None:
+        # Define number of possible filter bands
+        nchans = args.channels = int(data_band / args.tile_bandwidth)  - 1
+    elif args.channels is not None:
+        # Define filter bandwidth
+        band = args.tile_bandwidth = data_band / (args.channels + 1)
+    assert args.channels > 1
+
+
+

The minimum frequency to be explored can be user-defined by using the --min-frequency option.

+
# Lowest frequency of the first filter
+flow = args.min_frequency
+
+
+
+

Code access

+ ++++ + + + + + +
check_filtering_settings(sample_rate, ...)Check filtering settings and define the total number of channels and bandwidth to use for filter bank.
+
+
+ + +
+ + + + + \ No newline at end of file diff --git a/docs/epower_step2_psd.html b/docs/epower_step2_psd.html new file mode 100644 index 0000000..7cfa792 --- /dev/null +++ b/docs/epower_step2_psd.html @@ -0,0 +1,125 @@ + + + + + + + + Excess Power - Step 2: Estimate Power Spectral Density (PSD) — gdas 0.2.9 documentation + + + + + + + + + + + + + + + + + +
+ + +
+

Excess Power - Step 2: Estimate Power Spectral Density (PSD)

+

The instrument’s noise Power Spectral Density (PSD) will be used to whiten the data and help reveal the existence, or the absence, of repetitive patterns and correlation structures in the signal process. It will also determine the total bandwidth spanned by each of the filters that will subsequently be created. The first thing to do before calculating the PSD is to ensure that the time series data is converted into an array of floating values.

+
# Convert time series as array of float
+data = ts_data.astype(numpy.float64)
+
+
+

The PSD is calculated by splitting up the signal into overlapping segments and scan through each segment to calculate individual periodogram. The periodograms from each segment are then averaged, reducing the variance of the individual power measurements. In order to proceed, we need to define the average method, avg_method, that will be used to measure the PSD from the data. This can be specified with the --psd-estimation option.

+
# Average method to measure PSD from the data
+avg_method = args.psd_estimation
+
+
+

One also needs to specify the length of each segment, seg_len, as well as the separation between 2 consecutive segments, seg_stride. Both parameters can be defined in second units with the --psd-segment-length and --psd-segment-stride arguments respectively and can then be converted into sample unit.

+
# The segment length for PSD estimation in samples
+seg_len = int(args.psd_segment_length * args.sample_rate)
+# The separation between consecutive segments in samples
+seg_stride = int(args.psd_segment_stride * args.sample_rate)
+
+
+

We then use the Welch’s method to perform the power spectral density estimate using the welch module from the pycbc.psd library. What this will do is to compute the discrete Fourier transform for each PSD segment to produce invidual periodograms, and then compute the squared magnitude of the result. The individual periodograms are then averaged using the user-defined average method, avg_method, and return the frequency series, fd_psd, which will store the power measurement for each frequency bin.

+
# Lifted from the psd.from_cli module
+fd_psd = psd.welch(data,avg_method=avg_method,seg_len=seg_len,seg_stride=seg_stride)
+# Plot the power spectral density
+plot_spectrum(fd_psd)
+# We need this for the SWIG functions
+lal_psd = fd_psd.lal()
+
+
+

One can display the power measurements, frequency array and frequency between consecutive samples, \(\Delta f\) in Hertz, by printing the following variables:

+
print 'Display power measurements of the first 10 frequency bins'
+print fd_psd[:10]
+print 'Display central frequency of the first 10 bins'
+print fd_psd.sample_frequencies[:10]
+print 'Display the frequency separation between bins'
+print fd_psd.delta_f
+
+
+

\(\Delta f\) corresponds to the inverse of a segment’s length which is the smallest frequency (i.e. highest period) of detectable signals in each segment. The frequency range spans from 0 to the Nyquist frequency, i.e. half de the sampling rate.

+
+

Code access

+ ++++ + + + + + +
calculate_psd(ts_data, sample_rate, ...)Estimate Power Spectral Density (PSD)
+
+
+ + +
+ + + + + \ No newline at end of file diff --git a/docs/epower_step3_speccor.html b/docs/epower_step3_speccor.html new file mode 100644 index 0000000..dba9f21 --- /dev/null +++ b/docs/epower_step3_speccor.html @@ -0,0 +1,102 @@ + + + + + + + + Excess Power - Step 3: Two point spectral correlation — gdas 0.2.9 documentation + + + + + + + + + + + + + + + + + +
+ + +
+

Excess Power - Step 3: Two point spectral correlation

+

This part determines how much data on either side of the tukey window is to be discarded. Nominally, this means that one will lose window_fraction * args.psd_segment_length to corruption from the window, i.e. this is simply discarded. This is tuned to give an integer offset when used with args.psd_segment_length equal to 8, smaller windows will have fractions of integers, but larger powers of two will still preseve this (probably not a big deal in the end).

+
window_fraction = 0
+
+
+

The two point spectral correlation is then done with the calculate_spectral_correlation function which will return both the Tukey window applied to the original time series data and the actual two-point spectral correlation function for the whitened frequency series from the applied whitening window.

+
# Do two point spectral correlation
+window, spec_corr = calculate_spectral_correlation(seg_len,'tukey',window_fraction=window_fraction)
+window = window.data.data
+window_sigma_sq = numpy.mean(window**2)
+# Pre scale the window by its root mean squared -- see eqn 11 of EP document
+#window /= numpy.sqrt(window_sigma_sq)
+
+
+
+

Code access

+ ++++ + + + + + +
calculate_spectral_correlation(fft_window_len)Calculate the two point spectral correlation introduced by windowing the data before transforming to the frequency domain – valid choices are ‘hann’ and ‘tukey’.
+
+
+ + +
+ + + + + \ No newline at end of file diff --git a/docs/epower_step4_filterbank.html b/docs/epower_step4_filterbank.html new file mode 100644 index 0000000..0dc6c86 --- /dev/null +++ b/docs/epower_step4_filterbank.html @@ -0,0 +1,118 @@ + + + + + + + + Excess Power - Step 4: Computing the filter bank — gdas 0.2.9 documentation + + + + + + + + + + + + + + + + + +
+ + +
+

Excess Power - Step 4: Computing the filter bank

+

The filter bank will create band-pass filters for each channel in the PSD frequency domain. The create_filter_bank function will san the bandwidth from the central frequency of the first channel (i.e. flow+band/2) to final frequency of the last channel (i.e. band*nchans) in a increment equal to the frequency band. The filter’s total extent in Fourier space is actually twice the stated bandwidth (FWHM).

+
# Define filters
+filter_bank, fdb = create_filter_bank(fd_psd.delta_f, flow+band/2, band, nchans, fd_psd, spec_corr)
+
+
+

This function will returns 2 arrays: the filter_bank array which is a list of COMPLEX16FrequencySeries arrays corresponding to each channel’s filter, and the =fdb= array which provides the time-series from each filter. The length of each array is equal to the total number of channel (i.e. =nchans=). The filter’s data, \(\Delta f\) value, and first and last frequencies of any channel’s filter can be displayed as followed:

+
# Print data of first channel's filter
+print filter_bank[0].data.data
+# Print frequency separation between 2 values in the first channel's filter
+print filter_bank[0].deltaF
+# Print first frequency of the first channel's filter
+print filter_bank[0].f0
+# Print last frequency of the first channel's filter (equal to twice the channel's bandwidth)
+print filter_bank[0].f0+(len(filter_bank[0].data.data)-1)*filter_bank[0].deltaF
+
+
+

Further in the analysis, the following filters will used: +1. white_filter_ip: Whitened filter inner products computed with themselves. +2. unwhite_filter_ip: Unwhitened filter inner products computed with themselves. +3. white_ss_ip: Whitened filter inner products computed between input adjacent filters. +4. unwhite_ss_ip: Unwhitened filter inner products computed between input adjacent filters.

+
# This is necessary to compute the mu^2 normalizations
+white_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, None)
+unwhite_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, lal_psd)
+# These two are needed for the unwhitened mean square sum (hrss)
+white_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, None)
+unwhite_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, lal_psd)
+
+
+
+

Code access

+ ++++ + + + + + +
create_filter_bank(delta_f, flow, band, ...)Create filter bank
+
+
+ + +
+ + + + + \ No newline at end of file diff --git a/docs/epower_step5_normalization.html b/docs/epower_step5_normalization.html new file mode 100644 index 0000000..fb5e75a --- /dev/null +++ b/docs/epower_step5_normalization.html @@ -0,0 +1,100 @@ + + + + + + + + Excess Power - Step 5: Normalization of virtual channel — gdas 0.2.9 documentation + + + + + + + + + + + + + + + + + +
+ + +
+

Excess Power - Step 5: Normalization of virtual channel

+

The virtual channels will be used during the excesspower analysis to explore different frequency ranges around each PSD segments and look for possible triggers. Each channel is renormalized using the compute_channel_renomalization internal function.

+
# Initialise dictionary
+mu_sq_dict = {}
+# nc_sum additional channel adds
+for nc_sum in range(0, int(math.log(nchans, 2))):
+    min_band = (len(filter_bank[0].data.data)-1) * filter_bank[0].deltaF / 2
+    print tprint(t0,t1),"Calculation for %d %d Hz channels" % (nc_sum+1, min_band)
+    nc_sum = 2**nc_sum - 1
+    mu_sq_dict[nc_sum] = compute_channel_renomalization(nc_sum, filter_bank, spec_corr, nchans)
+
+
+
+

Code access

+ ++++ + + + + + +
compute_channel_renormalization(filter_bank, ...)Compute the renormalization for the base filters up to a given bandwidth.
+
+
+ + +
+ + + + + \ No newline at end of file diff --git a/docs/epower_step6_initialisation.html b/docs/epower_step6_initialisation.html new file mode 100644 index 0000000..463cbb6 --- /dev/null +++ b/docs/epower_step6_initialisation.html @@ -0,0 +1,104 @@ + + + + + + + + Initialise event list and determine stride boundaries — gdas 0.2.9 documentation + + + + + + + + + + + + + + + +
+ + +
+

Initialise event list and determine stride boundaries

+

First of all, we create a table similar than the one made by the LIGO Scientific Collaboration (LSC) where all the information will be stored. Such table is commonly know as lsctables. A pre-defined LSC table can be constructed using New function from the glue.ligolw.lsctables module. We use the SnglBurstTable function for the type of data to be stored and define all the columns we wish to record.

+
# Create event list for single burst table
+event_list = lsctables.New(lsctables.SnglBurstTable,
+                           ['start_time','start_time_ns','peak_time','peak_time_ns',
+                            'duration','bandwidth','central_freq','chisq_dof',
+                            'confidence','snr','amplitude','channel','ifo',
+                            'process_id','event_id','search','stop_time','stop_time_ns'])
+
+
+

We also need to determine the indexes of both starting and ending times for the first segment to analyse, respectively t_idx_min and t_idx_max. The default values are considered to be 0 for the starting index and the segment length in sample unit for the ending time index. Also, if the user defines a different starting time than the one from the loaded data, the offset index in sample unit is determined and added the both starting and ending time indexes.

+
# Determine boundaries of stride in time domain
+t_idx_min, t_idx_max = 0, seg_len
+# Check if user requested starting time is defined
+if args.analysis_start_time is not None:
+    # Define the time difference in seconds between data and user requested starting times
+    t_idx_off = args.analysis_start_time - ts_data.start_time
+    # Calculate the index of the user requested starting point in the data
+    t_idx_off = int(t_idx_off * args.sample_rate)
+else:
+    # Define index of the starting point as first value in data
+    t_idx_off = 0
+# Initialise minimum index values as offset starting index
+t_idx_min += t_idx_off
+# Initialise maximum index values as offset starting index
+t_idx_max += t_idx_off
+
+
+

Finally, the index for the ending time after all the segments have been analysed can be estimated for the user-defined parameter or is defined as the length of the time series data ts_data.

+
# Check if user requested end time is defined
+if args.analysis_end_time is not None:
+    # Define the time difference between data and user requested ending times
+    t_idx_max_off = args.analysis_end_time - ts_data.start_time
+    # Calculate the index of the user requested starting point in the data
+    t_idx_max_off = int(t_idx_max_off * args.sample_rate)
+else:
+    # Define index of the ending point as the length of data array
+    t_idx_max_off = len(ts_data)
+
+
+
+ + +
+ + + + + \ No newline at end of file diff --git a/docs/example.html b/docs/example.html new file mode 100644 index 0000000..32dfc17 --- /dev/null +++ b/docs/example.html @@ -0,0 +1,113 @@ + + + + + + + + Working Example — gdas 0.2.9 documentation + + + + + + + + + + + + + + + + + +
+ + +
+

Working Example

+

Either on your own computer or on the server, on a Jupyter notebook or on a Python script, the first thing to do is to import the gdas package that contain all the modules present in the GNOME software. That can be done easily by doing the following:

+
import gdas
+
+
+

In order to retrieve a specific chunk of data to be analyzed for a particular station, the name of the station along with the start and end dates should be specified:

+
station    = 'fribourg01'
+start_time = '2016-11-03-04'
+end_time   = '2016-11-03-04-2'
+
+
+

where the start and end times should always have at least the year, month and day specified, and with the values separated by a dash symbol. Hour and minute can also be specified.

+

If you are not working on the server and the data are located in a different repository than /GNOMEDrive/gnome/serverdata/, a custom path can be defined. For instance:

+
datapath = '/Users/vincent/data/GNOMEDrive/gnome/serverdata/'
+
+
+

The magnetic field data can then be retrieve as follows:

+
ts_data,ts_list,activity = gdas.magfield(station,start_time,end_time,rep=datapath)
+
+
+

The gdas.magfield method will return 3 arrays of data that can then be used to produce different plots:

+
gdas.plot_activity(activity)
+gdas.plot_time_series(station,ts_list,seglist=activity)
+gdas.plot_asd(station,ts_list)
+gdas.plot_whitening(station,ts_list,activity)
+
+
+

This is a script to do Excess Power analysis:

+
psd_segment_length = 60
+psd_segment_stride = 30
+psd_estimation     = 'median-mean'
+window_fraction    = 0
+tile_fap           = 1e-5
+channels           = 250
+
+gdas.excess_power(ts_data,psd_segment_length,psd_segment_stride,psd_estimation,window_fraction,tile_fap,station,nchans=channels)
+gdas.plot_triggers()
+
+
+
+ + +
+ + + + + \ No newline at end of file diff --git a/docs/excess_power.html b/docs/excess_power.html new file mode 100644 index 0000000..bb46aa0 --- /dev/null +++ b/docs/excess_power.html @@ -0,0 +1,637 @@ + + + + + + + + Excess-Power algorithm — gdas 0.2.9 documentation + + + + + + + + + + + + + + + +
+ + +
+

Excess-Power algorithm

+
+

General overview

+

The Excess Power method is known as the optimal detection strategy to search for burst signals for which only the duration and frequency band are known, which is basically the case for GNOME and its search of Axion-Like Particles (ALP). This method was developed and introduced by Anderson et al. (200) and has been extensively used in the detection of burst sources of gravitational radiation. A more technical documentation was written by Brady et al. (2007) describing how the algorithm used by the LIGO collaboration works and how the theory is translated into code.

+

We present below a step-by-step procedure followed during the Excess Power search analysis. For a better representation of what is happening, the figure at the end shows how the data is being split and analysed to search for multiple signals of different bandwidth and duration in the time-frequency plane.

+
    +
  • Time domain segmentation and PSD estimate

    +
    +

    We first estimate the instrument’s noise Power Spectral Density (PSD) by splitting the time-series data into multiple overlapping segments. A periodogram for each segment is calculated separately and then averaged, which will reduce the variance of the individual power measurements. The result is a frequency series where samples are separated in frequency space by \(\Delta f\) equal to the inverse of a segment’s length and with a high end frequency limit equal to the Nyquist limit. The final power spectrum will help reveal the existence, or the absence, of repetitive patterns and correlation structures in a signal process.

    +
    +
  • +
  • Comb of frequency channels

    +
    +

    We then split the PSD frequency series into multiple channels. For each channel, a frequency domain filter is created with a \(\Delta f\) determined by the PSD and a total extent in Fourier space that is twice the stated bandwidth of a channel. The result is a list of each channel filter’s frequency series.

    +
    +
  • +
  • Creating analysing blocks

    +
    +

    The Excess Power method can lead to moderately-large computational requirements, and it has been found that the computational efficiency of this implementation can be improved upon by considering blocks of data that are much longer than the longest signal time duration. The entire time series is therefore split into separate blocks. We use the length of the segments used for PSD estimate to define the duration of each block. For each block, the time series is c0Aonverted into frequency series which is then filtered by the filter bank throughout all the channels. A time-frequency map is finally created which stores all the filtered frequency series from each channel.

    +
    +
  • +
  • Creating tiles with different bandwidth

    +
    +

    We can now construct tiles with different bandwidth by summing multiple channels together.

    +
    +
  • +
  • Exploring tiles with different duration

    +
    +

    For each given tile’s bandwidth, one can investigate different tile’s duration. This can be done by exploring different number of degrees of freedom, \(d\), which can be calculated as follows: \(d=2BT\) where \(B\) and \(T\) are respectively the bandwidth and duration of the tile. Section 2.2.5 of Brady et al. gives a great description of how to interpret the number of degrees of freedom. Therefore, by changing the \(d\), one can explore multiple tile’s duration for different bandwidth.

    +
    +
  • +
  • Define triggering signal

    +
    +

    The energy of each tile in the time-frequency space is calculated and compare to a user-defined threshold value. After defining a tile false alarm probability threshold in Gaussian noise and using the number of degrees of freedom for each tile, one can define a energy threshold value above which a burst trigger can be identified by comparing the energy threshold with the tile’s energy in the time-frequency map. A tile energy time frequency map plot similar to Figure 5 in Pustelny et al. (2013) can then be made which plots the outlying tile energies present in the data.

    +
    +
  • +
+
+_images/overview.png +

Overview of the Excess Power method and difference between segments, channels, tiles and blocks.

+
+
+
+

Estimate Power Spectral Density (PSD)

+

The instrument’s noise Power Spectral Density (PSD) will be used to whiten the data and help reveal the existence, or the absence, of repetitive patterns and correlation structures in the signal process. It will also determine the total bandwidth spanned by each of the filters that will subsequently be created. The first thing to do before calculating the PSD is to ensure that the time series data is converted into an array of floating values.

+
# Convert time series as array of float
+data = ts_data.astype(numpy.float64)
+
+
+

The PSD is calculated by splitting up the signal into overlapping segments and scan through each segment to calculate individual periodogram. The periodograms from each segment are then averaged, reducing the variance of the individual power measurements. In order to proceed, we need to define the average method, avg_method, that will be used to measure the PSD from the data. This can be specified with the --psd-estimation option.

+
# Average method to measure PSD from the data
+avg_method = args.psd_estimation
+
+
+

One also needs to specify the length of each segment, seg_len, as well as the separation between 2 consecutive segments, seg_stride. Both parameters can be defined in second units with the --psd-segment-length and --psd-segment-stride arguments respectively and can then be converted into sample unit.

+
# The segment length for PSD estimation in samples
+seg_len = int(args.psd_segment_length * args.sample_rate)
+# The separation between consecutive segments in samples
+seg_stride = int(args.psd_segment_stride * args.sample_rate)
+
+
+

We then use the Welch’s method to perform the power spectral density estimate using the welch module from the pycbc.psd library. What this will do is to compute the discrete Fourier transform for each PSD segment to produce invidual periodograms, and then compute the squared magnitude of the result. The individual periodograms are then averaged using the user-defined average method, avg_method, and return the frequency series, fd_psd, which will store the power measurement for each frequency bin.

+
# Lifted from the psd.from_cli module
+fd_psd = psd.welch(data,avg_method=avg_method,seg_len=seg_len,seg_stride=seg_stride)
+# Plot the power spectral density
+plot_spectrum(fd_psd)
+# We need this for the SWIG functions
+lal_psd = fd_psd.lal()
+
+
+

One can display the power measurements, frequency array and frequency between consecutive samples, \(\Delta f\) in Hertz, by printing the following variables:

+
print 'Display power measurements of the first 10 frequency bins'
+print fd_psd[:10]
+print 'Display central frequency of the first 10 bins'
+print fd_psd.sample_frequencies[:10]
+print 'Display the frequency separation between bins'
+print fd_psd.delta_f
+
+
+

\(\Delta f\) corresponds to the inverse of a segment’s length which is the smallest frequency (i.e. highest period) of detectable signals in each segment. The frequency range spans from 0 to the Nyquist frequency, i.e. half de the sampling rate.

+
+
+

Checking filtering settings

+

The first thing to check is that the frequency of the high-pass filter (if defined) is below the minimum frequency of the filter bank. Indeed, a high-pass filter will only let pass frequency that are higher than the cutoff frequency (here defined by the strain_high_pass argument). If the high pass frequency is greater from the minimum frequency in the filter bank, the signal with frequencies lower than the cutoff frequency will get attenuated.

+
if args.min_frequency < args.strain_high_pass:
+    print >>sys.stderr, "Warning: strain high pass frequency %f is greater than the tile minimum frequency %f --- this is likely to cause strange output below the bandpass frequency" % (args.strain_high_pass, args.min_frequency)
+
+
+

In case the maximum frequency in the filter bank is not defined, we set it to be equal to the Nyquist frequency, i.e. half the sampling rate, which makes sense as a larger signal will not be able to get easily identifiable.

+
if args.max_frequency is None:
+    args.max_frequency = args.sample_rate / 2.0
+
+
+

If the bandwidth of the finest filter (--tile-bandwidth argument, see section construct_args or the number of frequency channels (=–channels= argument) is not defined but the total spectral band is (data_band), one can then determined all the filter settings as follows:

+
if args.tile_bandwidth is None and args.channels is None:
+    # Exit program with error message
+    exit("Either --tile-bandwidth or --channels must be specified to set up time-frequency plane")
+else:
+    # Define as assert statement that tile maximum frequency larger than its minimum frequency
+    assert args.max_frequency >= args.min_frequency
+    # Define spectral band of data
+    data_band = args.max_frequency - args.min_frequency
+    # Check if tile bandwidth or channel is defined
+    if args.tile_bandwidth is not None:
+        # Define number of possible filter bands
+        nchans = args.channels = int(data_band / args.tile_bandwidth)  - 1
+    elif args.channels is not None:
+        # Define filter bandwidth
+        band = args.tile_bandwidth = data_band / (args.channels + 1)
+    assert args.channels > 1
+
+
+

The minimum frequency to be explored can be user-defined by using the --min-frequency option.

+
# Lowest frequency of the first filter
+flow = args.min_frequency
+
+
+
+
+

Whitening window and spectral correlation

+

This part determines how much data on either side of the tukey window is to be discarded. Nominally, this means that one will lose window_fraction * args.psd_segment_length to corruption from the window, i.e. this is simply discarded. This is tuned to give an integer offset when used with args.psd_segment_length equal to 8, smaller windows will have fractions of integers, but larger powers of two will still preseve this (probably not a big deal in the end).

+
window_fraction = 0
+
+
+

The two point spectral correlation is then done with the calculate_spectral_correlation function which will return both the Tukey window applied to the original time series data and the actual two-point spectral correlation function for the whitened frequency series from the applied whitening window.

+
# Do two point spectral correlation
+window, spec_corr = calculate_spectral_correlation(seg_len,'tukey',window_fraction=window_fraction)
+window = window.data.data
+window_sigma_sq = numpy.mean(window**2)
+# Pre scale the window by its root mean squared -- see eqn 11 of EP document
+#window /= numpy.sqrt(window_sigma_sq)
+
+
+
+
+

Computing the filter bank

+

The filter bank will create band-pass filters for each channel in the PSD frequency domain. The create_filter_bank function will san the bandwidth from the central frequency of the first channel (i.e. flow+band/2) to final frequency of the last channel (i.e. band*nchans) in a increment equal to the frequency band. The filter’s total extent in Fourier space is actually twice the stated bandwidth (FWHM).

+
# Define filters
+filter_bank, fdb = create_filter_bank(fd_psd.delta_f, flow+band/2, band, nchans, fd_psd, spec_corr)
+
+
+

This function will returns 2 arrays: the filter_bank array which is a list of COMPLEX16FrequencySeries arrays corresponding to each channel’s filter, and the =fdb= array which provides the time-series from each filter. The length of each array is equal to the total number of channel (i.e. =nchans=). The filter’s data, \(\Delta f\) value, and first and last frequencies of any channel’s filter can be displayed as followed:

+
# Print data of first channel's filter
+print filter_bank[0].data.data
+# Print frequency separation between 2 values in the first channel's filter
+print filter_bank[0].deltaF
+# Print first frequency of the first channel's filter
+print filter_bank[0].f0
+# Print last frequency of the first channel's filter (equal to twice the channel's bandwidth)
+print filter_bank[0].f0+(len(filter_bank[0].data.data)-1)*filter_bank[0].deltaF
+
+
+

Further in the analysis, the following filters will used: +1. white_filter_ip: Whitened filter inner products computed with themselves. +2. unwhite_filter_ip: Unwhitened filter inner products computed with themselves. +3. white_ss_ip: Whitened filter inner products computed between input adjacent filters. +4. unwhite_ss_ip: Unwhitened filter inner products computed between input adjacent filters.

+
# This is necessary to compute the mu^2 normalizations
+white_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, None)
+unwhite_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, lal_psd)
+# These two are needed for the unwhitened mean square sum (hrss)
+white_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, None)
+unwhite_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, lal_psd)
+
+
+
+
+

Normalization of virtual channel

+

The virtual channels will be used during the excesspower analysis to explore different frequency ranges around each PSD segments and look for possible triggers. Each channel is renormalized using the compute_channel_renomalization internal function.

+
# Initialise dictionary
+mu_sq_dict = {}
+# nc_sum additional channel adds
+for nc_sum in range(0, int(math.log(nchans, 2))):
+    min_band = (len(filter_bank[0].data.data)-1) * filter_bank[0].deltaF / 2
+    print tprint(t0,t1),"Calculation for %d %d Hz channels" % (nc_sum+1, min_band)
+    nc_sum = 2**nc_sum - 1
+    mu_sq_dict[nc_sum] = compute_channel_renomalization(nc_sum, filter_bank, spec_corr, nchans)
+
+
+
+
+

Initialise event list and determine stride boundaries

+

First of all, we create a table similar than the one made by the LIGO Scientific Collaboration (LSC) where all the information will be stored. Such table is commonly know as lsctables. A pre-defined LSC table can be constructed using New function from the glue.ligolw.lsctables module. We use the SnglBurstTable function for the type of data to be stored and define all the columns we wish to record.

+
# Create event list for single burst table
+event_list = lsctables.New(lsctables.SnglBurstTable,
+                           ['start_time','start_time_ns','peak_time','peak_time_ns',
+                            'duration','bandwidth','central_freq','chisq_dof',
+                            'confidence','snr','amplitude','channel','ifo',
+                            'process_id','event_id','search','stop_time','stop_time_ns'])
+
+
+

We also need to determine the indexes of both starting and ending times for the first segment to analyse, respectively t_idx_min and t_idx_max. The default values are considered to be 0 for the starting index and the segment length in sample unit for the ending time index. Also, if the user defines a different starting time than the one from the loaded data, the offset index in sample unit is determined and added the both starting and ending time indexes.

+
# Determine boundaries of stride in time domain
+t_idx_min, t_idx_max = 0, seg_len
+# Check if user requested starting time is defined
+if args.analysis_start_time is not None:
+    # Define the time difference in seconds between data and user requested starting times
+    t_idx_off = args.analysis_start_time - ts_data.start_time
+    # Calculate the index of the user requested starting point in the data
+    t_idx_off = int(t_idx_off * args.sample_rate)
+else:
+    # Define index of the starting point as first value in data
+    t_idx_off = 0
+# Initialise minimum index values as offset starting index
+t_idx_min += t_idx_off
+# Initialise maximum index values as offset starting index
+t_idx_max += t_idx_off
+
+
+

Finally, the index for the ending time after all the segments have been analysed can be estimated for the user-defined parameter or is defined as the length of the time series data ts_data.

+
# Check if user requested end time is defined
+if args.analysis_end_time is not None:
+    # Define the time difference between data and user requested ending times
+    t_idx_max_off = args.analysis_end_time - ts_data.start_time
+    # Calculate the index of the user requested starting point in the data
+    t_idx_max_off = int(t_idx_max_off * args.sample_rate)
+else:
+    # Define index of the ending point as the length of data array
+    t_idx_max_off = len(ts_data)
+
+
+
+
+

Define analysing blocks

+

The first thing we do is to calculate the time series for the segment that is covered (tmp_ts_data) and redefined the metadata, especially the time of the first sample in seconds which is defined by the epoch argument and is different for every segment. After plotting the time series for that segment, the data are then converted into frequency series (fs_data) using the to_frequencyseries module from the pycbc.types.timeseries.TimeSeries library. Finally, the frequency data are then whitened.

+
# Loop over each data within the user requested time period
+while t_idx_max <= t_idx_max_off:
+    # Define starting and ending time of the segment in seconds
+    start_time = ts_data.start_time + t_idx_min/float(args.sample_rate)
+    end_time = ts_data.start_time + t_idx_max/float(args.sample_rate)
+    print tprint(t0,t1),"Analyzing block %i to %i (%.2f percent)"%(start_time,end_time,100*float(t_idx_max)/float(idx_max_off))
+    # Model a withen time series for the block
+    tmp_ts_data = types.TimeSeries(ts_data[t_idx_min:t_idx_max]*window, 1.0/args.sample_rate,epoch=start_time)
+    # Save time series in segment repository
+    segfolder = 'segments/%i-%i'%(start_time,end_time)
+    os.system('mkdir -p '+segfolder)
+    plot_ts(tmp_ts_data,fname='%s/ts.png'%(segfolder))
+    # Convert times series to frequency series
+    fs_data = tmp_ts_data.to_frequencyseries()
+    print tprint(t0,t1),"Frequency series data has variance: %s" % fs_data.data.std()**2
+    # Whitening (FIXME: Whiten the filters, not the data)
+    fs_data.data /= numpy.sqrt(fd_psd) / numpy.sqrt(2 * fd_psd.delta_f)
+    print tprint(t0,t1),"Whitened frequency series data has variance: %s" % fs_data.data.std()**2
+
+
+
+
+

Create time-frequency map for each block

+

We initialise a 2D zero array for a time-frequency map (tf_map) which will be computed for each frequency-domain filter associated to each PSD segment and where the filtered time-series for each frequency channels will be stored. The number of rows corresponds to the total number of frequency channels which is defined by the nchans variable. The number of columns corresponds to the segment length in samples (i.e. the number of samples covering one segment) which is defined by the seg_len variable.

+
# Initialise 2D zero array for time-frequency map
+tf_map = numpy.zeros((nchans, seg_len), dtype=numpy.complex128)
+
+
+

We also initialise a zero vector for a temporary filter bank (tmp_filter_bank) that will store, for a given channel, the filter’s values from the original filter bank (filter_bank) for that channel only. The length of the temporary filter bank is equal to the length of the PSD frequency series (fd_psd).

+
# Initialise 1D zero array
+tmp_filter_bank = numpy.zeros(len(fd_psd), dtype=numpy.complex128)
+
+
+

We then loop over all the frequency channels. While in the loop, we first re-initialise the temporary filter bank with zero values everywhere along the frequency series. We then determine the first and last frequency of each channel and re-define the values of the filter in that frequency range based on the values from the original channel’s filter from the original filter bank.

+
# Loop over all the channels
+print tprint(t0,t1),"Filtering all %d channels..." % nchans
+for i in range(nchans):
+    # Reset filter bank series
+    tmp_filter_bank *= 0.0
+    # Index of starting frequency
+    f1 = int(filter_bank[i].f0/fd_psd.delta_f)
+    # Index of ending frequency
+    f2 = int((filter_bank[i].f0 + 2*band)/fd_psd.delta_f)+1
+    # (FIXME: Why is there a factor of 2 here?)
+    tmp_filter_bank[f1:f2] = filter_bank[i].data.data * 2
+
+
+

We then extract the frequency series from the filter bank for that channel, which will be used as a template waveform to filter the actual data from the channel.

+
# Define the template to filter the frequency series with
+template = types.FrequencySeries(tmp_filter_bank, delta_f=fd_psd.delta_f, copy=False)
+
+
+

Finally, we use the matched_filter_core module from the pycbc.filter.matchedfilter library to filter the frequency series from the channel. This will return both a time series containing the complex signal-to-noise matched filtered against the data, and a frequency series containing the correlation vector.

+
# Create filtered series
+filtered_series = filter.matched_filter_core(template,fs_data,h_norm=None,psd=None,
+                                             low_frequency_cutoff=filter_bank[i].f0,
+                                             high_frequency_cutoff=filter_bank[i].f0+2*band)
+
+
+

The matched filter is the optimal linear filter for maximizing the signal to noise ratio (SNR) in the presence of additive stochastic noise. The filtered time series is stored in the time-frequency map and can be used to produce a spectrogram of the segment of data being analysed.

+
# Include filtered series in the map
+tf_map[i,:] = filtered_series[0].numpy()
+
+
+

The time-frequency map is a 2D array with a length that corresponds to the number of channels and a width equal to the number of sample present in one segment of data, i.e. segment’s length in seconds times the the sampling rate. The map can finally be plotted with a \(\Delta t\) corresponding to the sampling period of the original dataset (i.e. inverse of the original sampling rate), and \(\Delta f\) is equal to the bandwidth of one channel.

+
plot_spectrogram(numpy.abs(tf_map).T,tmp_ts_data.delta_t,fd_psd.delta_f,ts_data.sample_rate,start_time,end_time,fname='%s/tf.png'%(segfolder))
+
+
+
+
+

Constructing tiles of different bandwidth

+

First and foremost, we define a clipping region in the data to be used to remove window corruption, this is non-zero if the window_fraction variable is set to a non-zero value.

+
print tprint(t0,t1),"Beginning tile construction..."
+# Clip the boundaries to remove window corruption
+clip_samples = int(args.psd_segment_length * window_fraction * args.sample_rate / 2)
+
+
+

In order to perform a multi-resolution search, tiles of many different bandwidths and durations will be scanned. We first need to setup a loop such that the maximum number of additional channel is equal to the base 2 logarithm of the total number of channels. The number of narrow band channels to be summed (nc_sum) would therefore be equal to 2 to the power of the current quantity of additional channels.

+
for nc_sum in range(0, int(math.log(nchans, 2)))[::-1]: # nc_sum additional channel adds
+    nc_sum = 2**nc_sum - 1
+    print tprint(t0,t1,t2),"Summing %d narrow band channels..." % (nc_sum+1)
+
+
+

The undersampling rate for this tile can be calculated using the channel frequency band and the number of narrow band channels to be summed such that the bandwidth of the tile is equal to band * (nc_sum + 1).

+
us_rate = int(round(1.0 / (2 * band*(nc_sum+1) * ts_data.delta_t)))
+print >>sys.stderr, "Undersampling rate for this level: %f" % (args.sample_rate/us_rate)
+
+
+

“Virtual” wide bandwidth channels are constructed by summing the samples from multiple channels, and correcting for the overlap between adjacent channel filters. We then define the normalised channel at the current level and create a time frequency map for this tile using the make_indp_tiles internal function. In other word, we are constructing multiple sub-tiles for which we can determined the respective energy in the given frequency band.

+
mu_sq = mu_sq_dict[nc_sum]
+sys.stderr.write("\t...calculating tiles...")
+if clip_samples > 0:
+    tiles = make_indp_tiles(tf_map[:,clip_samples:-clip_samples:us_rate], nc_sum, mu_sq)
+else:
+    tiles = make_indp_tiles(tf_map[:,::us_rate], nc_sum, mu_sq)
+sys.stderr.write(" TF-plane is %dx%s samples... " % tiles.shape)
+print >>sys.stderr, " done"
+print "Tile energy mean: %f, var %f" % (numpy.mean(tiles), numpy.var(tiles))
+
+
+
+
+

Explore multiple tile durations

+

Now that we create a tile with a specific bandwidth, we can start exploring different durations for the tile. We will start checking if the user manually defined a value for the longest duration tile to compute, which can be done using the --max-duration argument. If not, the value will be set to 32.

+
if args.max_duration is not None:
+    max_dof = 2 * args.max_duration * (band * (nc_sum+1))
+else:
+    max_dof = 32
+assert max_dof >= 2
+
+
+

Since we produce (initially) tiles with 1 degree of freedom, the duration goes as one over twice the bandwidth.

+
print "\t\t...getting longer durations..."
+#for j in [2**l for l in xrange(1, int(math.log(max_dof, 2))+1)]:
+for j in [2**l for l in xrange(0, int(math.log(max_dof, 2)))]:
+    sys.stderr.write("\t\tSumming DOF = %d ..." % (2*j))
+    #tlen = tiles.shape[1] - j + 1
+    tlen = tiles.shape[1] - 2*j + 1 + 1
+    if tlen <= 0:
+        print >>sys.stderr, " ...not enough samples."
+        continue
+    dof_tiles = numpy.zeros((tiles.shape[0], tlen))
+    #:sum_filter = numpy.ones(j)
+    # FIXME: This is the correct filter for 50% overlap
+    sum_filter = numpy.array([1,0] * (j-1) + [1])
+    #sum_filter = numpy.array([1,0] * int(math.log(j, 2)-1) + [1])
+    for f in range(tiles.shape[0]):
+        # Sum and drop correlate tiles
+        # FIXME: don't drop correlated tiles
+        #output = numpy.convolve(tiles[f,:], sum_filter, 'valid')
+        dof_tiles[f] = fftconvolve(tiles[f], sum_filter, 'valid')
+    print >>sys.stderr, " done"
+    print "Summed tile energy mean: %f, var %f" % (numpy.mean(dof_tiles), numpy.var(dof_tiles))
+    level_tdiff = time.time() - tdiff
+    print >>sys.stderr, "Done with this resolution, total %f" % level_tdiff
+
+
+

Finally, the bandwidth and duration of the tile can be defined as followed:

+
# Current bandwidth of the time-frequency map tiles
+current_band = band * (nc_sum + 1)
+# How much each "step" is in the frequency domain -- almost
+# assuredly the fundamental bandwidth
+df = current_band
+# How much each "step" is in the time domain -- under sampling rate
+# FIXME: THis won't work if the sample rate isn't a power of 2
+dt = 1.0 / 2 / (2 * current_band) * 2
+full_band = 250
+dt = current_band / full_band * ts_data.sample_rate
+dt = 1.0/dt
+# Duration is fixed by the NDOF and bandwidth
+duration = j / 2.0 / current_band
+
+
+
+
+

Trigger finding

+

In order to find any trigger in the data, we first need to set a false alarm probability threshold in Gaussian noise above which signal will be distinguished from the noise. Such threshold can be determined by using the /inverse survival function/ method from the scipy.stats.chi2 package.

+
threshold = scipy.stats.chi2.isf(args.tile_fap, j)
+print "Threshold for this level: %f" % threshold
+#if numpy.any(dof_tiles > threshold):
+    #plot_spectrogram(dof_tiles.T)
+    #import pdb; pdb.set_trace()
+
+
+

Once the threshold is set, one can then run the trigger_list_from_map function to quickly find the trigger signal from the dof_tiles array that

+
# Since we clip the data, the start time needs to be adjusted accordingly
+window_offset_epoch = fs_data.epoch + args.psd_segment_length * window_fraction / 2
+trigger_list_from_map(dof_tiles, event_list, threshold, window_offset_epoch, filter_bank[0].f0 + band/2, duration, current_band, df, dt, None)
+for event in event_list[::-1]:
+    if event.amplitude != None:
+        continue
+    etime_min_idx = float(event.get_start()) - float(fs_data.epoch)
+    etime_min_idx = int(etime_min_idx / tmp_ts_data.delta_t)
+    etime_max_idx = float(event.get_start()) - float(fs_data.epoch) + event.duration
+    etime_max_idx = int(etime_max_idx / tmp_ts_data.delta_t)
+    # (band / 2) to account for sin^2 wings from finest filters
+    flow_idx = int((event.central_freq - event.bandwidth / 2 - (band / 2) - flow) / band)
+    fhigh_idx = int((event.central_freq + event.bandwidth / 2 + (band / 2) - flow) / band)
+    # TODO: Check that the undersampling rate is always commensurate
+    # with the indexing: that is to say that
+    # mod(etime_min_idx, us_rate) == 0 always
+    z_j_b = tf_map[flow_idx:fhigh_idx,etime_min_idx:etime_max_idx:us_rate]
+    # FIXME: Deal with negative hrss^2 -- e.g. remove the event
+    try:
+        event.amplitude = measure_hrss(z_j_b, unwhite_filter_ip[flow_idx:fhigh_idx], unwhite_ss_ip[flow_idx:fhigh_idx-1], white_ss_ip[flow_idx:fhigh_idx-1], fd_psd.delta_f, tmp_ts_data.delta_t, len(filter_bank[0].data.data), event.chisq_dof)
+    except ValueError:
+        event.amplitude = 0
+
+print "Total number of events: %d" % len(event_list)
+
+
+
+
+

Switch to new block

+

The following will move the frequency band to the next segment:

+
tdiff = time.time() - tdiff
+print "Done with this block: total %f" % tdiff
+
+t_idx_min += int(seg_len * (1 - window_fraction))
+t_idx_max += int(seg_len * (1 - window_fraction))
+
+
+
+
+

Extracting GPS time range

+

We use the LIGOTimeGPS structure from the =glue.lal= package to /store the starting and ending time in the dataset to nanosecond precision and synchronized to the Global Positioning System time reference/. Once both times are defined, the range of value is stored in a semi-open interval using the segment module from the =glue.segments= package.

+
# Starting epoch relative to GPS starting epoch
+start_time = LIGOTimeGPS(args.analysis_start_time or args.gps_start_time)
+# Ending epoch relative to GPS ending epoch
+end_time = LIGOTimeGPS(args.analysis_end_time or args.gps_end_time)
+# Represent the range of values in the semi-open interval
+inseg = segment(start_time,end_time)
+
+
+
+
+

Prepare output file for given time range

+
xmldoc = ligolw.Document()
+xmldoc.appendChild(ligolw.LIGO_LW())
+
+ifo = args.channel_name.split(":")[0]
+proc_row = register_to_xmldoc(xmldoc, __program__, args.__dict__, ifos=[ifo],version=glue.git_version.id, cvs_repository=glue.git_version.branch, cvs_entry_time=glue.git_version.date)
+
+# Figure out the data we actually analyzed
+outseg = determine_output_segment(inseg, args.psd_segment_length, args.sample_rate, window_fraction)
+
+ss = append_search_summary(xmldoc, proc_row, ifos=(station,), inseg=inseg, outseg=outseg)
+
+for sb in event_list:
+    sb.process_id = proc_row.process_id
+    sb.search = proc_row.program
+    #sb.ifo, sb.channel = args.channel_name.split(":")
+    sb.ifo, sb.channel = station, setname
+
+xmldoc.childNodes[0].appendChild(event_list)
+fname = make_filename(station, inseg)
+
+utils.write_filename(xmldoc, fname, gz=fname.endswith("gz"), verbose=True)
+
+
+
+
+

Plot trigger results

+
events = SnglBurstTable.read(fname+'.gz')
+#del events[10000:]
+plot = events.plot('time', 'central_freq', "duration", "bandwidth", color="snr")
+#plot = events.plot('time', 'central_freq', color='snr')
+#plot.set_yscale("log")
+plot.set_ylim(1e-0, 250)
+t0 = 1153742417
+plot.set_xlim(t0 + 0*60, t0 + 1*60)
+#plot.set_xlim(t0 + 28, t0 + 32)
+pyplot.axvline(t0 + 30, color='r')
+cb = plot.add_colorbar(cmap='viridis')
+plot.savefig("triggers.png")
+
+
+
+
+
+

Module Access

+
+

Extract Magnetic Field Data

+

Extract magnetic field data from HDF5 files.

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + +
magfield(station, starttime, endtime[, ...])Glob all files withing user-defined period and extract data.
file_to_segment(hfile, segname)Define length of data segment.
construct_utc_from_metadata(datestr, t0str)
generate_timeseries(data_list[, setname])Generate time series using list of HDF5 data file paths
create_activity_list(station, data_order)Create consecutive list of available data segment.
retrieve_data_timeseries(hfile, setname)Retrieve data time series from HDF5 data file
retrieve_channel_data(hfile, setname)Retrieve the data from specific channel
+
+
+

Plotting routines

+

Methods to produce time-frequency plots and others

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
plot_activity(full_seglist)Plot full activity period for station.
plot_time_series(station, ts_list[, seglist, hp])Generate a plot of the whole data time series
plot_asd(station, ts_list)Plot Amplitude Spectral Density.
plot_whitening(station, ts_list[, seglist])Generate a spectrogram plot and normalized spectrogram
plot_ts(ts[, fname])
plot_spectrum(fd_psd)
plot_spectrogram(spec, dt, df, sample_rate, ...)
plot_spectrogram_from_ts(ts)
plot_triggers()
+
+
+

Utilities

+

Independent routines to do various other things

+ ++++ + + + + + +
create_sound(ts)Create sound based on the data
+
+
+ + +
+ + + + + \ No newline at end of file diff --git a/docs/generated/gdas.epower.calculate_psd.html b/docs/generated/gdas.epower.calculate_psd.html index ba6b78a..63d1828 100644 --- a/docs/generated/gdas.epower.calculate_psd.html +++ b/docs/generated/gdas.epower.calculate_psd.html @@ -27,8 +27,8 @@ - - + + diff --git a/docs/generated/gdas.epower.calculate_spectral_correlation.html b/docs/generated/gdas.epower.calculate_spectral_correlation.html index 3779832..7e7f4a6 100644 --- a/docs/generated/gdas.epower.calculate_spectral_correlation.html +++ b/docs/generated/gdas.epower.calculate_spectral_correlation.html @@ -27,8 +27,8 @@ - - + + diff --git a/docs/generated/gdas.epower.check_filtering_settings.html b/docs/generated/gdas.epower.check_filtering_settings.html index 7f4110a..055a738 100644 --- a/docs/generated/gdas.epower.check_filtering_settings.html +++ b/docs/generated/gdas.epower.check_filtering_settings.html @@ -27,8 +27,8 @@ - - + + diff --git a/docs/generated/gdas.epower.compute_channel_renormalization.html b/docs/generated/gdas.epower.compute_channel_renormalization.html index 67ac886..eecf200 100644 --- a/docs/generated/gdas.epower.compute_channel_renormalization.html +++ b/docs/generated/gdas.epower.compute_channel_renormalization.html @@ -27,8 +27,7 @@ - - + @@ -64,11 +61,9 @@

gdas.epower.compute_channel_renormalization

- «  gdas.epower.compute_filter_ips_adjacent + «  Excess Power - Step 5: Normalization of virtual channel   ::   Contents -   ::   - gdas.epower.measure_hrss  »

diff --git a/docs/generated/gdas.epower.compute_filter_ips_adjacent.html b/docs/generated/gdas.epower.compute_filter_ips_adjacent.html index f075303..3c1f5bd 100644 --- a/docs/generated/gdas.epower.compute_filter_ips_adjacent.html +++ b/docs/generated/gdas.epower.compute_filter_ips_adjacent.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.compute_filter_ips_self.html b/docs/generated/gdas.epower.compute_filter_ips_self.html index 25cc01d..512e9de 100644 --- a/docs/generated/gdas.epower.compute_filter_ips_self.html +++ b/docs/generated/gdas.epower.compute_filter_ips_self.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.construct_tiles.html b/docs/generated/gdas.epower.construct_tiles.html index 3ecf4e9..9af333b 100644 --- a/docs/generated/gdas.epower.construct_tiles.html +++ b/docs/generated/gdas.epower.construct_tiles.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.convert_to_time_domain.html b/docs/generated/gdas.epower.convert_to_time_domain.html index d296435..6f7aab7 100644 --- a/docs/generated/gdas.epower.convert_to_time_domain.html +++ b/docs/generated/gdas.epower.convert_to_time_domain.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.create_filter_bank.html b/docs/generated/gdas.epower.create_filter_bank.html index 4117d4c..930d46e 100644 --- a/docs/generated/gdas.epower.create_filter_bank.html +++ b/docs/generated/gdas.epower.create_filter_bank.html @@ -27,8 +27,8 @@ - - + + diff --git a/docs/generated/gdas.epower.create_tf_plane.html b/docs/generated/gdas.epower.create_tf_plane.html index 95ce797..34ac521 100644 --- a/docs/generated/gdas.epower.create_tf_plane.html +++ b/docs/generated/gdas.epower.create_tf_plane.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.create_tile_duration.html b/docs/generated/gdas.epower.create_tile_duration.html index e6ed49a..b38be43 100644 --- a/docs/generated/gdas.epower.create_tile_duration.html +++ b/docs/generated/gdas.epower.create_tile_duration.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.create_xml.html b/docs/generated/gdas.epower.create_xml.html index 30e4d0e..931ad91 100644 --- a/docs/generated/gdas.epower.create_xml.html +++ b/docs/generated/gdas.epower.create_xml.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.determine_output_segment.html b/docs/generated/gdas.epower.determine_output_segment.html index cf9a9b2..64a15e6 100644 --- a/docs/generated/gdas.epower.determine_output_segment.html +++ b/docs/generated/gdas.epower.determine_output_segment.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.excess_power.html b/docs/generated/gdas.epower.excess_power.html index 8aef405..3e62857 100644 --- a/docs/generated/gdas.epower.excess_power.html +++ b/docs/generated/gdas.epower.excess_power.html @@ -27,8 +27,8 @@ - - + + diff --git a/docs/generated/gdas.epower.identify_block.html b/docs/generated/gdas.epower.identify_block.html index 6bcd820..583785f 100644 --- a/docs/generated/gdas.epower.identify_block.html +++ b/docs/generated/gdas.epower.identify_block.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.make_filename.html b/docs/generated/gdas.epower.make_filename.html index 38d3a10..51e44b7 100644 --- a/docs/generated/gdas.epower.make_filename.html +++ b/docs/generated/gdas.epower.make_filename.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.make_indp_tiles.html b/docs/generated/gdas.epower.make_indp_tiles.html index 1088be5..306346b 100644 --- a/docs/generated/gdas.epower.make_indp_tiles.html +++ b/docs/generated/gdas.epower.make_indp_tiles.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.make_tiles.html b/docs/generated/gdas.epower.make_tiles.html index 1ef4e56..cbc4e30 100644 --- a/docs/generated/gdas.epower.make_tiles.html +++ b/docs/generated/gdas.epower.make_tiles.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.measure_hrss.html b/docs/generated/gdas.epower.measure_hrss.html index ce4110b..fea67ad 100644 --- a/docs/generated/gdas.epower.measure_hrss.html +++ b/docs/generated/gdas.epower.measure_hrss.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.measure_hrss_poorly.html b/docs/generated/gdas.epower.measure_hrss_poorly.html index 4e3ef94..de41184 100644 --- a/docs/generated/gdas.epower.measure_hrss_poorly.html +++ b/docs/generated/gdas.epower.measure_hrss_poorly.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.measure_hrss_slowly.html b/docs/generated/gdas.epower.measure_hrss_slowly.html index eb6e1ed..40fa3ea 100644 --- a/docs/generated/gdas.epower.measure_hrss_slowly.html +++ b/docs/generated/gdas.epower.measure_hrss_slowly.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.epower.trigger_list_from_map.html b/docs/generated/gdas.epower.trigger_list_from_map.html index ff667b9..213dbb7 100644 --- a/docs/generated/gdas.epower.trigger_list_from_map.html +++ b/docs/generated/gdas.epower.trigger_list_from_map.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.plots.plot_activity.html b/docs/generated/gdas.plots.plot_activity.html index 15671f4..612b678 100644 --- a/docs/generated/gdas.plots.plot_activity.html +++ b/docs/generated/gdas.plots.plot_activity.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.plots.plot_asd.html b/docs/generated/gdas.plots.plot_asd.html index d82476e..d5eef35 100644 --- a/docs/generated/gdas.plots.plot_asd.html +++ b/docs/generated/gdas.plots.plot_asd.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.plots.plot_spectrogram.html b/docs/generated/gdas.plots.plot_spectrogram.html index 95f9815..680d8c0 100644 --- a/docs/generated/gdas.plots.plot_spectrogram.html +++ b/docs/generated/gdas.plots.plot_spectrogram.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.plots.plot_spectrogram_from_ts.html b/docs/generated/gdas.plots.plot_spectrogram_from_ts.html index ab9e77c..88857cf 100644 --- a/docs/generated/gdas.plots.plot_spectrogram_from_ts.html +++ b/docs/generated/gdas.plots.plot_spectrogram_from_ts.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.plots.plot_spectrum.html b/docs/generated/gdas.plots.plot_spectrum.html index 0eea3b4..4ac607f 100644 --- a/docs/generated/gdas.plots.plot_spectrum.html +++ b/docs/generated/gdas.plots.plot_spectrum.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.plots.plot_time_series.html b/docs/generated/gdas.plots.plot_time_series.html index 5220764..96be31f 100644 --- a/docs/generated/gdas.plots.plot_time_series.html +++ b/docs/generated/gdas.plots.plot_time_series.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.plots.plot_triggers.html b/docs/generated/gdas.plots.plot_triggers.html index 9060066..1e4092f 100644 --- a/docs/generated/gdas.plots.plot_triggers.html +++ b/docs/generated/gdas.plots.plot_triggers.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.plots.plot_ts.html b/docs/generated/gdas.plots.plot_ts.html index 5579fa8..896a2c8 100644 --- a/docs/generated/gdas.plots.plot_ts.html +++ b/docs/generated/gdas.plots.plot_ts.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.plots.plot_whitening.html b/docs/generated/gdas.plots.plot_whitening.html index d00f321..054ff24 100644 --- a/docs/generated/gdas.plots.plot_whitening.html +++ b/docs/generated/gdas.plots.plot_whitening.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.retrieve.construct_utc_from_metadata.html b/docs/generated/gdas.retrieve.construct_utc_from_metadata.html index 2d0ea31..b286c00 100644 --- a/docs/generated/gdas.retrieve.construct_utc_from_metadata.html +++ b/docs/generated/gdas.retrieve.construct_utc_from_metadata.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.retrieve.create_activity_list.html b/docs/generated/gdas.retrieve.create_activity_list.html index f894f27..7e09d8e 100644 --- a/docs/generated/gdas.retrieve.create_activity_list.html +++ b/docs/generated/gdas.retrieve.create_activity_list.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.retrieve.file_to_segment.html b/docs/generated/gdas.retrieve.file_to_segment.html index 21d3d80..51838ec 100644 --- a/docs/generated/gdas.retrieve.file_to_segment.html +++ b/docs/generated/gdas.retrieve.file_to_segment.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.retrieve.generate_timeseries.html b/docs/generated/gdas.retrieve.generate_timeseries.html index 6b4b857..d0fff85 100644 --- a/docs/generated/gdas.retrieve.generate_timeseries.html +++ b/docs/generated/gdas.retrieve.generate_timeseries.html @@ -26,9 +26,7 @@ - - - + diff --git a/docs/generated/gdas.retrieve.magfield.html b/docs/generated/gdas.retrieve.magfield.html index e161a50..3d91b93 100644 --- a/docs/generated/gdas.retrieve.magfield.html +++ b/docs/generated/gdas.retrieve.magfield.html @@ -26,9 +26,7 @@ - - - + -
-

Plotting routines

-

Methods to produce time-frequency plots and others

- ---- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
plot_activity(full_seglist)Plot full activity period for station.
plot_time_series(station, ts_list[, seglist, hp])Generate a plot of the whole data time series
plot_asd(station, ts_list)Plot Amplitude Spectral Density.
plot_whitening(station, ts_list[, seglist])Generate a spectrogram plot and normalized spectrogram
plot_ts(ts[, fname])
plot_spectrum(fd_psd)
plot_spectrogram(spec, dt, df, sample_rate, ...)
plot_spectrogram_from_ts(ts)
plot_triggers()

Excess Power Search Analysis

-

Main class to do excess-power search analysis

- ---- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
excess_power(ts_data, psd_segment_length, ...)Perform excess-power search analysis on magnetic field data.
check_filtering_settings(sample_rate, ...)Check filtering settings and define the total number of channels and bandwidth to use for filter bank.
calculate_psd(ts_data, sample_rate, ...)Estimate Power Spectral Density (PSD)
calculate_spectral_correlation(fft_window_len)Calculate the two point spectral correlation introduced by windowing the data before transforming to the frequency domain – valid choices are ‘hann’ and ‘tukey’.
create_filter_bank(delta_f, flow, band, ...)Create filter bank
convert_to_time_domain(fdb, sample_rate)Convert filter bank from frequency to time domain
identify_block(ts_data, fd_psd, window, ...)Get frequency series of the current block
create_tf_plane(fd_psd, nchans, seg_len, ...)Create time-frequency map
compute_filter_ips_self(lal_filters, spec_corr)Compute a set of inner products of input filters with themselves.
compute_filter_ips_adjacent(lal_filters, ...)Compute a set of filter inner products between input adjacent filters.
compute_channel_renormalization(filter_bank, ...)Compute the renormalization for the base filters up to a given bandwidth.
measure_hrss(z_j_b, uw_ss_ii, uw_ss_ij, ...)Approximation of unwhitened sum of squares signal energy in a given EP tile.
measure_hrss_slowly(z_j_b, lal_filters, ...)Approximation of unwhitened sum of squares signal energy in a given EP tile.
measure_hrss_poorly(tile_energy, sub_psd)
trigger_list_from_map(tfmap, event_list, ...)
determine_output_segment(inseg, dt_stride, ...)Given an input data stretch segment inseg, a data block stride dt_stride, the data sample rate, and an optional window_fraction, return the amount of data that can be processed without corruption effects from the window.
make_tiles(tf_map, nc_sum, mu_sq)
make_indp_tiles(tf_map, nc_sum, mu_sq)Create a time frequency map with resolution of tf_map binning divided by nc_sum + 1.
make_filename(ifo, seg[, tag, ext])
construct_tiles(nc_sum, mu_sq, band, ...)Constructing tile and calculate their energy
create_tile_duration(j, df, duration, tiles)
create_xml(ts_data, psd_segment_length, ...)
+
-
-

Utilities

-

Independent routines to do various other things

- ---- - - - - - -
create_sound(ts)Create sound based on the data
-
@@ -1177,7 +94,7 @@

UtilitiesContents   ::   - gdas.retrieve.magfield  » + Installation  »

diff --git a/docs/installation.html b/docs/installation.html new file mode 100644 index 0000000..d3380fe --- /dev/null +++ b/docs/installation.html @@ -0,0 +1,123 @@ + + + + + + + + Installation — gdas 0.2.9 documentation + + + + + + + + + + + + + + + + + +
+ + +
+

Installation

+

The program requires the following general packages to run: Numpy, Matplotlib, Scipy and Astropy. The following LIGO-related packages are also required for full functionality: Gwpy, PyCBC, Glue, LAL, LALburst and LALsimulation.

+

While most of the packages can be installed automatically using pip, some LIGO packages (Glue, LAL, LALburst and LALsimulation) must be installed separately beforehand as they contain several C routines that need specific compilation. However, these packages are already included in a bigger package called LALsuite which can be installed fairly easily on Debian (Linux) and Mac OS machines.

+
+

LALsuite tools

+

Some useful pages on how to download and install the LIGO software can be found here.

+
+

MacPorts (Mac)

+

For Mac users, the installation is pretty easy, detailed information can be found on this page. You need to have MacPorts installed. The following commands should suffice to install the LALsuite package on your machine:

+
sudo port install lscsoft-deps
+sudo port install glue
+sudo port install lalapps
+
+
+

The first command will install all the dependencies needed for the LIGO software to be installed. The following 2 commands will install the actual packages.

+
+
+

apt-get (Debian)

+

Since the LIGO software is not a default package in the apt package manager system on Debian machine, additional steps will be needed. The first step is to add the following links to the source list located at /etc/apt/sources.list:

+
deb [arch=amd64] http://software.ligo.org/lscsoft/debian jessie contrib
+deb-src [arch=amd64] http://software.ligo.org/lscsoft/debian jessie contrib
+
+
+

Note that the [arch=amd64] is needed to fix the architecture problem in case it tries to install i386 version on 64-bit Debian. Once the sources have been added, you must first install all the dependencies as follows:

+
apt-get install build-essential automake autoconf libtool devscripts
+
+
+

The LIGO software can finally be installed using the following command:

+
apt-get install lscsoft-all
+
+
+
+
+
+

Main Program

+

The best way to install the GNOME software along with the rest of the dependencies is by using pip:

+
pip install gdas
+
+
+

(You may need to put a sudo in front of this). For this to work +you need to have pip installed. This +method allows for easy uninstallation.

+

You can also simply download the tarball from the PyPI website, unpack it and then do:

+
python setup.py install
+
+
+

The latest stable package can be downloaded from PyPI: https://pypi.python.org/pypi/gdas. +The development version can be downloaded from here.

+
+
+ + +
+ + + + + \ No newline at end of file diff --git a/docs/objects.inv b/docs/objects.inv index 8eee2aa..d19b65c 100644 Binary files a/docs/objects.inv and b/docs/objects.inv differ diff --git a/docs/searchindex.js b/docs/searchindex.js index 9225ddb..dfe67dc 100644 --- a/docs/searchindex.js +++ b/docs/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["generated/gdas.epower.calculate_psd","generated/gdas.epower.calculate_spectral_correlation","generated/gdas.epower.check_filtering_settings","generated/gdas.epower.compute_channel_renormalization","generated/gdas.epower.compute_filter_ips_adjacent","generated/gdas.epower.compute_filter_ips_self","generated/gdas.epower.construct_tiles","generated/gdas.epower.convert_to_time_domain","generated/gdas.epower.create_filter_bank","generated/gdas.epower.create_tf_plane","generated/gdas.epower.create_tile_duration","generated/gdas.epower.create_xml","generated/gdas.epower.determine_output_segment","generated/gdas.epower.excess_power","generated/gdas.epower.identify_block","generated/gdas.epower.make_filename","generated/gdas.epower.make_indp_tiles","generated/gdas.epower.make_tiles","generated/gdas.epower.measure_hrss","generated/gdas.epower.measure_hrss_poorly","generated/gdas.epower.measure_hrss_slowly","generated/gdas.epower.trigger_list_from_map","generated/gdas.plots.plot_activity","generated/gdas.plots.plot_asd","generated/gdas.plots.plot_spectrogram","generated/gdas.plots.plot_spectrogram_from_ts","generated/gdas.plots.plot_spectrum","generated/gdas.plots.plot_time_series","generated/gdas.plots.plot_triggers","generated/gdas.plots.plot_ts","generated/gdas.plots.plot_whitening","generated/gdas.retrieve.construct_utc_from_metadata","generated/gdas.retrieve.create_activity_list","generated/gdas.retrieve.file_to_segment","generated/gdas.retrieve.generate_timeseries","generated/gdas.retrieve.magfield","generated/gdas.retrieve.retrieve_channel_data","generated/gdas.retrieve.retrieve_data_timeseries","generated/gdas.utils.create_sound","index"],envversion:50,filenames:["generated/gdas.epower.calculate_psd.rst","generated/gdas.epower.calculate_spectral_correlation.rst","generated/gdas.epower.check_filtering_settings.rst","generated/gdas.epower.compute_channel_renormalization.rst","generated/gdas.epower.compute_filter_ips_adjacent.rst","generated/gdas.epower.compute_filter_ips_self.rst","generated/gdas.epower.construct_tiles.rst","generated/gdas.epower.convert_to_time_domain.rst","generated/gdas.epower.create_filter_bank.rst","generated/gdas.epower.create_tf_plane.rst","generated/gdas.epower.create_tile_duration.rst","generated/gdas.epower.create_xml.rst","generated/gdas.epower.determine_output_segment.rst","generated/gdas.epower.excess_power.rst","generated/gdas.epower.identify_block.rst","generated/gdas.epower.make_filename.rst","generated/gdas.epower.make_indp_tiles.rst","generated/gdas.epower.make_tiles.rst","generated/gdas.epower.measure_hrss.rst","generated/gdas.epower.measure_hrss_poorly.rst","generated/gdas.epower.measure_hrss_slowly.rst","generated/gdas.epower.trigger_list_from_map.rst","generated/gdas.plots.plot_activity.rst","generated/gdas.plots.plot_asd.rst","generated/gdas.plots.plot_spectrogram.rst","generated/gdas.plots.plot_spectrogram_from_ts.rst","generated/gdas.plots.plot_spectrum.rst","generated/gdas.plots.plot_time_series.rst","generated/gdas.plots.plot_triggers.rst","generated/gdas.plots.plot_ts.rst","generated/gdas.plots.plot_whitening.rst","generated/gdas.retrieve.construct_utc_from_metadata.rst","generated/gdas.retrieve.create_activity_list.rst","generated/gdas.retrieve.file_to_segment.rst","generated/gdas.retrieve.generate_timeseries.rst","generated/gdas.retrieve.magfield.rst","generated/gdas.retrieve.retrieve_channel_data.rst","generated/gdas.retrieve.retrieve_data_timeseries.rst","generated/gdas.utils.create_sound.rst","index.rst"],objects:{"gdas.epower":{calculate_psd:[0,0,1,""],calculate_spectral_correlation:[1,0,1,""],check_filtering_settings:[2,0,1,""],compute_channel_renormalization:[3,0,1,""],compute_filter_ips_adjacent:[4,0,1,""],compute_filter_ips_self:[5,0,1,""],construct_tiles:[6,0,1,""],convert_to_time_domain:[7,0,1,""],create_filter_bank:[8,0,1,""],create_tf_plane:[9,0,1,""],create_tile_duration:[10,0,1,""],create_xml:[11,0,1,""],determine_output_segment:[12,0,1,""],excess_power:[13,0,1,""],identify_block:[14,0,1,""],make_filename:[15,0,1,""],make_indp_tiles:[16,0,1,""],make_tiles:[17,0,1,""],measure_hrss:[18,0,1,""],measure_hrss_poorly:[19,0,1,""],measure_hrss_slowly:[20,0,1,""],trigger_list_from_map:[21,0,1,""]},"gdas.plots":{plot_activity:[22,0,1,""],plot_asd:[23,0,1,""],plot_spectrogram:[24,0,1,""],plot_spectrogram_from_ts:[25,0,1,""],plot_spectrum:[26,0,1,""],plot_time_series:[27,0,1,""],plot_triggers:[28,0,1,""],plot_ts:[29,0,1,""],plot_whitening:[30,0,1,""]},"gdas.retrieve":{construct_utc_from_metadata:[31,0,1,""],create_activity_list:[32,0,1,""],file_to_segment:[33,0,1,""],generate_timeseries:[34,0,1,""],magfield:[35,0,1,""],retrieve_channel_data:[36,0,1,""],retrieve_data_timeseries:[37,0,1,""]},"gdas.utils":{create_sound:[38,0,1,""]}},objnames:{"0":["py","function","Python function"]},objtypes:{"0":"py:function"},terms:{"case":39,"class":39,"default":[12,39],"final":[33,39],"float":[0,2,7,8,13,14,39],"function":[20,33,39],"import":39,"int":[2,13,35,39],"return":[4,5,12,34,39],"true":39,"try":39,"var":39,"while":39,And:23,For:39,GPS:[31,35],One:39,Such:39,That:39,The:[1,4,16,33,39],These:39,__dict__:39,__program__:39,abl:39,abov:39,abs:39,absenc:39,acced:39,accordingli:39,account:39,activ:[22,35],actual:39,add:39,add_colorbar:39,add_state_seg:39,added:39,addit:[16,39],adjac:[4,18,39],adjust:39,afach:39,after:39,against:39,agg:[23,39],alarm:[13,39],all:[13,16,32,35,39],allan:39,allow:39,almost:39,along:39,alp:39,alreadi:39,also:39,alwai:39,amd64:39,amelior:[23,39],amount:12,amplitud:23,analys:[13,35],analysi:13,analysis_end_tim:39,analysis_start_tim:39,analyz:39,anderson:39,ani:39,append_search_summari:39,appendchild:39,appli:[16,39],applic:39,approxim:[18,20],arch:39,architectur:39,area:18,arg:39,argument:[4,5,39],around:39,arrai:[4,9,34,39],ascii:39,assert:39,associ:39,assum:[4,12],assuredli:39,astro:39,astropi:39,astyp:39,attenu:39,attr:39,attribut:[33,36,37],audio:39,autoconf:39,automak:39,automat:39,avail:[22,32,34,39],averag:[0,13,39],avg_method:39,axi:39,axion:39,axvlin:39,band:[6,8,9,13,21,39],bandpass:39,bandwidth:[2,3,8,13],bank:[2,7,8,13],base:[3,38],basic:39,becom:39,been:39,befor:[1,39],beforehand:39,begin:39,being:39,below:39,best:39,better:39,between:[0,4,13,39],big:39,bigger:39,bin:[16,18,39],bit:39,block:[12,14,18],blue:39,both:39,bradi:39,branch:39,browser:39,budker:39,build:39,bunch:13,burst:39,c0aonvert:39,calcul:[1,6,33,39],calculate_spectral_correl:39,call:39,can:[12,16,39],caus:39,caveat:16,ceil:39,central:39,central_freq:39,chang:39,channel:[2,13,36,37],channel_nam:39,check:2,chi2:39,childnod:39,chisq_dof:39,choic:1,chose:39,chri:0,chunk:39,click:39,clip:39,clip_sampl:39,close:39,cmap:39,coalesc:39,code:39,collabor:39,color:39,column:39,comb:39,command:39,commensur:39,commonli:39,compar:39,compil:39,complain:[23,39],complet:39,complex128:39,complex16frequencyseri:39,complex:[16,23,39],complic:39,comput:[3,4,5],compute_channel_renom:39,compute_filter_ips_adjac:39,compute_filter_ips_self:39,condit:39,confid:39,consecut:[0,13,32,39],consid:39,consist:39,construct:[6,18,31],construct_arg:39,construct_utc_from_metadata:33,contact:0,contain:39,continu:[22,39],contrib:39,convert:[7,39],convolv:39,copi:39,correct:39,correl:1,correspond:39,corrupt:[12,39],cover:[18,39],creat:[8,9,16,32,38],create_filter_bank:39,cross:39,current:[14,39],current_band:39,custom:39,cutoff:39,cvs_entry_tim:39,cvs_repositori:39,dai:39,dash:39,data:[0,1,2,7,12,13,14,22,27,31,32,33,34,35,36,37,38],data_band:39,data_dt:39,data_list:[34,39],data_ord:[32,39],datapath:39,dataqualitydict:39,dataqualityflag:39,dataset:39,date:[31,33,39],datestr:31,deal:39,deb:39,defin:[2,13,33,35],degre:[18,39],del:39,delta:39,delta_f:[8,18,39],delta_t:[18,20,39],deltaf:39,densiti:[0,9,14,23],dep:39,depend:39,deprec:20,describ:39,descript:39,detail:39,detect:39,determin:33,determine_output_seg:39,develop:39,deviat:39,devscript:39,dictionari:[22,32,34,39],directli:39,discard:39,discret:39,displai:39,distinguish:39,distribut:39,divid:16,document:39,dof:[18,20,39],dof_til:39,doing:39,domain:[1,7,39],don:39,done:39,download:39,dpi:39,draw:39,drop:39,dt_stride:12,dtype:39,durat:[10,13,21],dure:39,each:[8,13,32,34],easi:39,easili:39,edg:39,edgecolor:39,effect:12,effici:39,either:39,element:39,elif:39,els:39,end:[16,33,39],end_tim:[24,39],endswith:39,endtim:35,energi:[6,18,20,39],enough:39,ensembl:39,ensur:39,entir:39,epoch:39,eqn:39,equal:39,equat:[18,20],error:39,especi:39,essenti:39,estim:0,etc:39,etime_max_idx:39,etime_min_idx:39,event_id:39,event_list:[11,21,39],everi:[13,39],everywher:39,except:39,excess:13,excess_pow:39,excesspow:[15,39],exist:39,exit:39,expos:39,ext:15,extens:39,extent:39,extract:[31,33,35],f_0:39,facecolor:39,factor:39,fairli:39,fakesign:39,fals:[13,35,39],favor:20,fd_psd:[9,14,26,39],fdb:[7,39],fft:39,fft_window_len:1,fftconvolv:39,fftlength:39,fhigh_idx:39,field:[0,7,13,14,35,38],figur:39,file:[13,32,33,34,35,36,37],file_ord:39,file_to_seg:39,filenam:39,filter:[2,3,4,5,7,8,13,18],filter_band:39,filter_bank:[3,9,39],filter_len:18,filtered_seri:39,finest:[2,16,39],first:[14,35,39],fitler:18,fix:39,fixm:39,float64:39,flow:[8,39],flow_idx:39,fmax:[2,8,13],fmin:[2,8,13],fname:[24,29,39],focu:39,follow:39,foremost:39,format:0,fortun:39,found:[13,39],fourier:39,fout:39,fraction:[13,39],free:39,freedom:[18,39],frequenc:[1,2,4,7,8,9,13,14,16,18],frequencyseri:39,fribourg01:39,from:[0,2,7,12,13,16,31,33,36,37,39],from_cli:39,from_lal:39,front:39,fs_data:[9,39],full:[22,39],full_band:39,full_data:[34,39],full_seglist:[22,39],fullpath:39,fundament:39,further:39,fwhm:39,gaussian:[13,16,39],gca:39,gda:39,gener:[27,30,34],generate_timeseri:39,get:14,get_start:39,git_vers:39,give:39,given:[3,4,5,12,18,20],glob:[35,39],global:39,glue:[33,39],gnome:35,gnomedr:[35,39],goe:39,gps_end_tim:39,gps_start_tim:39,grant:39,gravit:39,great:39,greater:39,gwpy:39,h5py:[33,36,37,39],h_norm:39,haar:16,half:39,handi:39,handl:39,hann:1,happen:39,has:[1,39],have:[16,39],hdf5:[32,33,34,36,37,39],help:39,here:39,hertz:39,hfile:[33,36,37,39],hide:39,high:39,high_frequency_cutoff:39,higher:39,highest:[2,13,39],hour:39,how:39,howev:39,hrss:39,hstack:39,http:39,i386:39,identifi:[13,39],idx_max_off:39,ifo:[15,39],implement:39,improv:39,includ:39,increment:39,inde:39,independ:[16,39],index:[4,14,39],indic:16,individu:[16,39],inform:[0,39],initi:39,inner:[4,5,18,20,39],input:[4,5,12,16,39],inseg:[12,39],instanc:39,instrument:39,integ:39,intern:39,internet:39,interpret:39,interv:[33,39],introduc:[1,39],invers:39,investig:39,invidu:39,invok:23,isf:39,isn:39,issu:23,its:[4,39],jessi:39,join:39,jupyt:39,jupyterhub:39,keep:16,kei:39,know:39,known:[16,39],label:39,lal:39,lal_filt:[4,5,20],lal_psd:39,lalapp:39,lalburst:39,lalsimul:39,larg:[23,39],larger:39,last:[14,35,39],later:39,latest:39,lead:39,least:39,len:39,length:[0,13,33,39],let:39,level:39,level_tdiff:39,librari:[33,39],libtool:39,lift:39,ligo:39,ligo_lw:39,ligolw:39,ligotimegp:39,like:39,limit:39,linear:39,link:39,linux:39,list:[7,22,32,34],load:39,locat:39,log:39,logarithm:39,login:39,loglength:39,longer:39,longest:39,look:[16,39],loop:39,lose:39,low_frequency_cutoff:39,lower:39,lowest:[2,8,13,39],lsc:39,lscsoft:39,lsctabl:39,machin:39,made:39,magfield:39,magnet:[0,7,13,14,35,38],magneticfield:[11,34,39],magnitud:39,mai:39,mainz:39,make:[20,39],make_filenam:39,make_indp_til:39,manag:39,mani:[20,39],manual:39,map:[9,16,18],mask:39,match:39,matched_filter_cor:39,matchedfilt:39,math:39,matplotlib:39,max:39,max_dof:39,max_dur:[6,13,39],max_frequ:[2,39],maxim:39,maximum:39,mean:[1,16,39],measur:[0,39],measure_hrss:[20,39],median:39,member:39,messag:39,metadata:[2,31,33,36,37],method:[0,13,39],min:39,min_band:39,min_frequ:[2,39],minimum:39,minut:39,mkdir:39,mod:39,model:39,moder:39,modul:33,mono:39,month:39,more:[0,39],most:39,move:39,mu_sq:[6,16,17,39],mu_sq_dict:39,much:39,must:39,name:[32,33,34,35,39],nanosecond:39,narrow:39,nativ:18,nc_sum:[6,16,17,39],nchan:[3,8,9,13,39],ndof:39,necessari:39,need:[0,23,39],neg:39,next:39,nois:[13,39],nomin:39,non:39,none:[1,4,5,8,13,21,27,30,39],norm:30,normal:[16,30],normalis:39,note:[0,16,20,39],notebook:39,now:39,number:[2,13,18,20,23,39],numpi:39,nyquist:39,object:[33,36,37],offset:39,onc:39,one:39,ones:39,onli:[1,39],open:[33,39],oper:39,optim:[16,39],option:[12,39],order:39,org:39,origin:[16,39],other:39,our:[16,39],ourselv:16,out:39,outli:39,output:16,outseg:39,outsid:39,over:39,overbar:30,overlap:[16,39],own:39,packag:[33,39],pad:39,page:39,pankow:0,paramet:[0,1,2,7,8,9,13,14,18,22,31,32,33,34,35,36,37,38,39],parenthesi:39,part:39,particl:39,particular:39,particularli:20,pass:39,path1:39,path2:39,path3:39,path:[34,39],pattern:39,pdb:39,peak_tim:39,peak_time_n:39,peopl:39,per:39,percent:39,perform:[13,39],period:[22,35,39],periodogram:39,pip:39,plan:16,plane:[16,39],plot:13,plot_act:39,plot_asd:39,plot_spectrogram:39,plot_spectrum:39,plot_t:39,plot_time_seri:39,plot_trigg:39,plot_whiten:39,plotarg:39,plotter:39,png:[24,29,39],point:[1,14,23,39],port:39,posit:39,possibl:39,power:[0,9,13,14],pre:39,precis:23,preload:33,presenc:39,present:39,presev:39,pretti:39,print:39,probabl:[13,39],problem:39,proc_row:39,proce:39,procedur:39,process:[12,39],process_id:39,produc:13,product:[4,5,18,20,39],properli:39,provid:39,psd:[0,4,5,8,20,21],psd_estim:[0,13,39],psd_segment_length:[0,6,11,13,39],psd_segment_strid:[0,13,39],pure_ton:39,pustelni:39,put:39,pycbc:39,pypi:39,pyplot:39,python:39,quantiti:39,quickli:39,radiat:39,random:39,random_tim:39,rang:[13,33],rate:[0,2,7,12,39],ratio:39,read:39,recomput:20,record:39,redefin:39,reduc:39,refer:[18,20,39],region:39,register_to_xmldoc:39,rel:39,relat:39,remov:39,renorm:[3,39],rep:[35,39],repetit:39,repositori:39,repres:[33,39],represent:39,request:39,requir:[20,35,39],rescal:39,reset:39,resolut:[16,18,39],respect:39,rest:39,result:16,retriev:2,retrieve_channel_data:39,reveal:39,root:39,round:39,row:39,run:39,sai:39,sam:39,same:[4,39],sampl:[0,2,7,12,18,39],sample_frequ:39,sample_r:[0,2,7,12,24,39],samplingr:39,san:39,saniti:39,satisfi:39,save:[16,39],savefig:39,scalar:39,scale:39,scan:39,scientif:39,scipi:39,script:39,search:13,second:[0,13,39],section:39,see:[18,20,39],seg:[15,39],seg_len:[9,39],seg_strid:39,segfil:39,segfold:39,seglist:[27,30,39],segment:[0,12,13,32,33,34],segmentlist:39,segmentplot:39,segnam:33,select:[13,22,39],semi:[33,39],send:39,sens:39,separ:[0,13,39],seri:[0,13,14,27,34,37,38],serverdata:[35,39],set:[2,4,5,12],set_titl:39,set_trac:39,set_xlim:39,set_xscal:39,set_ylim:39,set_yscal:39,setframer:39,setnam:[11,34,36,37,39],setnchannel:39,setsampwidth:39,setup:39,sever:39,shape:39,should:39,show:39,side:39,signal:[18,20],simburstgaussian:39,simburstsinegaussian:39,similar:39,simpli:39,sin:39,sinc:[20,39],sine:39,singl:39,slow:20,smaller:39,smallest:39,snglbursttabl:39,snr:39,some:39,somehow:[23,39],soon:39,sort:39,sound:38,sourc:[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39],space:39,span:39,spec:[24,39],spec_corr:[3,4,5,8,20,39],specgram:24,specif:[33,36,39],specifi:39,spectral:[0,1,23],spectrogram:30,spectrogramplot:39,spectrum:[9,14,39],spectrumplot:39,split:39,sqrt:[30,39],squar:[18,20,39],src:39,ssh:39,stabl:39,standard:39,start:[23,33,39],start_freq:21,start_tim:[21,24,39],start_time_n:39,starttim:35,stat:39,state:39,statement:39,station:[11,13,22,23,27,30,32,35,39],std:39,stderr:39,step:39,still:39,stochast:39,stop_tim:39,stop_time_n:39,store:[16,34,39],str:[31,33,35],strain:39,strain_high_pass:39,strang:39,strategi:39,stretch:12,stride:12,string:[0,13,32,36,37],structur:39,studi:39,sub:39,sub_psd:19,subsequ:39,sudo:39,suffic:39,sum:[16,18,20,39],sum_filt:39,surviv:39,swig:39,symbol:39,synchron:39,sys:39,system:39,t0s:16,t0str:31,t1200125:[18,20],t_idx_max:[14,39],t_idx_max_off:39,t_idx_min:[14,39],t_idx_off:39,tab:39,tabl:39,tag:15,tarbal:39,task:39,tdiff:39,technic:39,templat:39,temporari:39,termin:39,test:39,tf_map:[6,16,17,39],tfmap:21,than:39,thei:39,themselv:[5,39],theori:39,therefor:39,thi:[13,16,20,39],thing:[16,39],think:16,those:39,threshold:[13,21,39],through:39,throughout:39,tile:[6,10,13,16,18,20],tile_bandwidth:[2,39],tile_energi:19,tile_fap:[13,39],time:[0,7,9,13,14,16,18,22,27,31,33,34,37,38],time_seri:39,timeseri:[0,13,14,38,39],timeseriesplot:39,timestamp:[31,35,39],titl:39,tlen:39,tmp_filter_bank:39,tmp_ts_data:39,to_frequencyseri:39,todo:39,togeth:39,total:[2,13,39],tprint:39,track:16,transform:[1,16,39],translat:39,tri:39,trigger:13,trigger_list_from_map:39,ts_data:[0,6,11,13,14,39],ts_list:[23,27,30,39],tsum:39,tukei:[1,39],tune:39,twice:[18,39],two:[1,39],txt:39,type:39,under:39,undersampl:39,uni:39,uniformli:39,uninstal:39,unit:[16,39],unix:39,unpack:39,unwhite_filter_ip:39,unwhite_ss_ip:39,unwhiten:[4,5,18,20,39],upon:39,us_rat:39,use:[2,39],used:[33,39],useful:39,user:[13,35],usernam:39,using:[33,34,39],usual:39,utc:[31,33,39],uw_ss_ii:18,uw_ss_ij:18,valid:[1,39],valu:39,valueerror:39,variabl:[16,39],varianc:[16,39],variou:39,vector:39,verbos:39,veri:39,version:39,vincent:39,viridi:39,vmax:39,vmin:39,w_ss_ij:18,wai:[16,39],want:39,warn:39,wav:39,wave:39,waveform:39,wavelet:16,wax:39,webpag:39,websit:39,welch:39,well:[13,39],what:39,when:39,where:[16,39],which:[18,39],white_filter_ip:39,white_plot:39,white_ss_ip:39,whiten:18,whitened_spectrogram:39,who:39,whole:27,why:39,wide:39,wider:16,width:39,window:[1,12,13,14],window_fr:12,window_fract:[1,6,11,12,13,39],window_offset_epoch:39,window_sigma_sq:39,wing:39,wish:39,withen:[13,39],within:[13,39],withing:35,without:12,won:39,word:39,work:16,would:39,wout:39,write:39,write_filenam:39,writefram:39,written:39,wspec:39,wtype:1,xlabel:39,xlim:39,xml:[13,15],xmldoc:39,xrang:39,xunit:39,year:39,ylabel:39,ylim:39,you:39,your:39,z_j_b:[18,20,39],zero:[16,39],zpad:39},titles:["gdas.epower.calculate_psd","gdas.epower.calculate_spectral_correlation","gdas.epower.check_filtering_settings","gdas.epower.compute_channel_renormalization","gdas.epower.compute_filter_ips_adjacent","gdas.epower.compute_filter_ips_self","gdas.epower.construct_tiles","gdas.epower.convert_to_time_domain","gdas.epower.create_filter_bank","gdas.epower.create_tf_plane","gdas.epower.create_tile_duration","gdas.epower.create_xml","gdas.epower.determine_output_segment","gdas.epower.excess_power","gdas.epower.identify_block","gdas.epower.make_filename","gdas.epower.make_indp_tiles","gdas.epower.make_tiles","gdas.epower.measure_hrss","gdas.epower.measure_hrss_poorly","gdas.epower.measure_hrss_slowly","gdas.epower.trigger_list_from_map","gdas.plots.plot_activity","gdas.plots.plot_asd","gdas.plots.plot_spectrogram","gdas.plots.plot_spectrogram_from_ts","gdas.plots.plot_spectrum","gdas.plots.plot_time_series","gdas.plots.plot_triggers","gdas.plots.plot_ts","gdas.plots.plot_whitening","gdas.retrieve.construct_utc_from_metadata","gdas.retrieve.create_activity_list","gdas.retrieve.file_to_segment","gdas.retrieve.generate_timeseries","gdas.retrieve.magfield","gdas.retrieve.retrieve_channel_data","gdas.retrieve.retrieve_data_timeseries","gdas.utils.create_sound","GNOME Data Analysis Software"],titleterms:{"new":39,"switch":39,GPS:39,access:39,activ:39,algorithm:39,amplitud:39,analys:39,analysi:39,apt:39,artifici:39,asd:39,bandwidth:39,bank:39,base:39,block:39,boundari:39,calculate_psd:0,calculate_spectral_correl:1,channel:39,check:39,check_filtering_set:2,comput:39,compute_channel_renorm:3,compute_filter_ips_adjac:4,compute_filter_ips_self:5,concaten:39,construct:39,construct_til:6,construct_utc_from_metadata:31,convert_to_time_domain:7,correl:39,creat:39,create_activity_list:32,create_filter_bank:8,create_sound:38,create_tf_plan:9,create_tile_dur:10,create_xml:11,data:39,debian:39,defin:39,densiti:39,determin:39,determine_output_seg:12,differ:39,durat:39,each:39,epow:[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21],establish:39,estim:39,event:39,exampl:39,excess:39,excess_pow:13,explor:39,extract:39,fake:39,field:39,file:39,file_to_seg:33,filter:39,find:39,frequenc:39,gda:[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38],gener:39,generate_timeseri:34,get:39,given:39,gnome:39,identify_block:14,initialis:39,inject:39,instal:39,introduct:39,invok:39,issu:39,lalsuit:39,list:39,mac:39,macport:39,magfield:35,magnet:39,main:39,make_filenam:15,make_indp_til:16,make_til:17,map:39,measure_hrss:18,measure_hrss_poorli:19,measure_hrss_slowli:20,metadata:39,modul:39,multi:39,multipl:39,normal:39,output:39,overview:39,plot:[22,23,24,25,26,27,28,29,30,39],plot_act:22,plot_asd:23,plot_spectrogram:24,plot_spectrogram_from_t:25,plot_spectrum:26,plot_t:29,plot_time_seri:27,plot_trigg:28,plot_whiten:30,power:39,precis:39,prepar:39,produc:39,program:39,psd:39,rang:39,real:39,result:39,retriev:[31,32,33,34,35,36,37,39],retrieve_channel_data:36,retrieve_data_timeseri:37,routin:39,search:39,segment:39,seri:39,server:39,set:39,signal:39,simul:39,softwar:39,sound:39,spectral:39,spectrogram:39,stride:39,tile:39,time:39,tool:39,trigger:39,trigger_list_from_map:21,user:39,util:[38,39],virtual:39,whiten:39,window:39,work:39}}) \ No newline at end of file +Search.setIndex({docnames:["backup","epower_overview","epower_step1_checkfilt","epower_step2_psd","epower_step3_speccor","epower_step4_filterbank","epower_step5_normalization","epower_step6_initialisation","example","excess_power","generated/gdas.epower.calculate_psd","generated/gdas.epower.calculate_spectral_correlation","generated/gdas.epower.check_filtering_settings","generated/gdas.epower.compute_channel_renormalization","generated/gdas.epower.compute_filter_ips_adjacent","generated/gdas.epower.compute_filter_ips_self","generated/gdas.epower.construct_tiles","generated/gdas.epower.convert_to_time_domain","generated/gdas.epower.create_filter_bank","generated/gdas.epower.create_tf_plane","generated/gdas.epower.create_tile_duration","generated/gdas.epower.create_xml","generated/gdas.epower.determine_output_segment","generated/gdas.epower.excess_power","generated/gdas.epower.identify_block","generated/gdas.epower.make_filename","generated/gdas.epower.make_indp_tiles","generated/gdas.epower.make_tiles","generated/gdas.epower.measure_hrss","generated/gdas.epower.measure_hrss_poorly","generated/gdas.epower.measure_hrss_slowly","generated/gdas.epower.trigger_list_from_map","generated/gdas.plots.plot_activity","generated/gdas.plots.plot_asd","generated/gdas.plots.plot_spectrogram","generated/gdas.plots.plot_spectrogram_from_ts","generated/gdas.plots.plot_spectrum","generated/gdas.plots.plot_time_series","generated/gdas.plots.plot_triggers","generated/gdas.plots.plot_ts","generated/gdas.plots.plot_whitening","generated/gdas.retrieve.construct_utc_from_metadata","generated/gdas.retrieve.create_activity_list","generated/gdas.retrieve.file_to_segment","generated/gdas.retrieve.generate_timeseries","generated/gdas.retrieve.magfield","generated/gdas.retrieve.retrieve_channel_data","generated/gdas.retrieve.retrieve_data_timeseries","generated/gdas.utils.create_sound","index","installation","server"],envversion:50,filenames:["backup.rst","epower_overview.rst","epower_step1_checkfilt.rst","epower_step2_psd.rst","epower_step3_speccor.rst","epower_step4_filterbank.rst","epower_step5_normalization.rst","epower_step6_initialisation.rst","example.rst","excess_power.rst","generated/gdas.epower.calculate_psd.rst","generated/gdas.epower.calculate_spectral_correlation.rst","generated/gdas.epower.check_filtering_settings.rst","generated/gdas.epower.compute_channel_renormalization.rst","generated/gdas.epower.compute_filter_ips_adjacent.rst","generated/gdas.epower.compute_filter_ips_self.rst","generated/gdas.epower.construct_tiles.rst","generated/gdas.epower.convert_to_time_domain.rst","generated/gdas.epower.create_filter_bank.rst","generated/gdas.epower.create_tf_plane.rst","generated/gdas.epower.create_tile_duration.rst","generated/gdas.epower.create_xml.rst","generated/gdas.epower.determine_output_segment.rst","generated/gdas.epower.excess_power.rst","generated/gdas.epower.identify_block.rst","generated/gdas.epower.make_filename.rst","generated/gdas.epower.make_indp_tiles.rst","generated/gdas.epower.make_tiles.rst","generated/gdas.epower.measure_hrss.rst","generated/gdas.epower.measure_hrss_poorly.rst","generated/gdas.epower.measure_hrss_slowly.rst","generated/gdas.epower.trigger_list_from_map.rst","generated/gdas.plots.plot_activity.rst","generated/gdas.plots.plot_asd.rst","generated/gdas.plots.plot_spectrogram.rst","generated/gdas.plots.plot_spectrogram_from_ts.rst","generated/gdas.plots.plot_spectrum.rst","generated/gdas.plots.plot_time_series.rst","generated/gdas.plots.plot_triggers.rst","generated/gdas.plots.plot_ts.rst","generated/gdas.plots.plot_whitening.rst","generated/gdas.retrieve.construct_utc_from_metadata.rst","generated/gdas.retrieve.create_activity_list.rst","generated/gdas.retrieve.file_to_segment.rst","generated/gdas.retrieve.generate_timeseries.rst","generated/gdas.retrieve.magfield.rst","generated/gdas.retrieve.retrieve_channel_data.rst","generated/gdas.retrieve.retrieve_data_timeseries.rst","generated/gdas.utils.create_sound.rst","index.rst","installation.rst","server.rst"],objects:{"gdas.epower":{calculate_psd:[10,0,1,""],calculate_spectral_correlation:[11,0,1,""],check_filtering_settings:[12,0,1,""],compute_channel_renormalization:[13,0,1,""],compute_filter_ips_adjacent:[14,0,1,""],compute_filter_ips_self:[15,0,1,""],construct_tiles:[16,0,1,""],convert_to_time_domain:[17,0,1,""],create_filter_bank:[18,0,1,""],create_tf_plane:[19,0,1,""],create_tile_duration:[20,0,1,""],create_xml:[21,0,1,""],determine_output_segment:[22,0,1,""],excess_power:[23,0,1,""],identify_block:[24,0,1,""],make_filename:[25,0,1,""],make_indp_tiles:[26,0,1,""],make_tiles:[27,0,1,""],measure_hrss:[28,0,1,""],measure_hrss_poorly:[29,0,1,""],measure_hrss_slowly:[30,0,1,""],trigger_list_from_map:[31,0,1,""]},"gdas.plots":{plot_activity:[32,0,1,""],plot_asd:[33,0,1,""],plot_spectrogram:[34,0,1,""],plot_spectrogram_from_ts:[35,0,1,""],plot_spectrum:[36,0,1,""],plot_time_series:[37,0,1,""],plot_triggers:[38,0,1,""],plot_ts:[39,0,1,""],plot_whitening:[40,0,1,""]},"gdas.retrieve":{construct_utc_from_metadata:[41,0,1,""],create_activity_list:[42,0,1,""],file_to_segment:[43,0,1,""],generate_timeseries:[44,0,1,""],magfield:[45,0,1,""],retrieve_channel_data:[46,0,1,""],retrieve_data_timeseries:[47,0,1,""]},"gdas.utils":{create_sound:[48,0,1,""]}},objnames:{"0":["py","function","Python function"]},objtypes:{"0":"py:function"},terms:{"case":[0,1,2,9,50],"class":0,"default":[0,7,9,22,50],"final":[0,1,5,7,9,43,50],"float":[0,3,9,10,12,17,18,23,24],"function":[0,3,4,5,6,7,9,30,43,49,50],"import":[0,8,9],"int":[0,2,3,6,7,9,12,23,45],"new":[7,51],"return":[0,3,4,5,8,9,14,15,22,44],"true":[0,9],"try":[0,9],"var":[0,9],"while":[0,9,50,51],And:33,For:[0,1,8,9,50],GPS:[41,45],One:[0,3,9],Such:[0,7,9],That:[0,8],The:[0,1,2,3,4,5,6,7,8,9,11,14,26,43,50,51],These:[0,5,9],__dict__:[0,9],__program__:[0,9],abl:[0,2,9],abov:[0,1,9],abs:[0,9],absenc:[0,1,3,9],acced:[0,51],access:51,accordingli:[0,9],account:[0,9],activ:[8,32,45],actual:[0,4,5,9,50],add:[0,6,9,50],add_colorbar:[0,9],add_state_seg:0,added:[0,7,9,50],addit:[0,6,9,26,50],adjac:[0,5,9,14,28],adjust:[0,9],afach:[0,51],after:[0,1,7,9,51],against:[0,9],agg:[0,33],alarm:[0,1,9,23],algorithm:1,all:[0,1,2,7,8,9,23,26,42,45,49,50],allan:[0,49],allow:[0,50,51],almost:[0,9],along:[0,8,9,50],alp:[0,1,9],alreadi:[0,50],also:[0,3,7,8,9,50,51],alwai:[0,8,9],amd64:[0,50],amelior:[0,33],amount:22,amplitud:[7,9,33],analys:[1,7,23,45],analysi:[1,5,6,8,9,23,51],analysis_end_tim:[0,7,9],analysis_start_tim:[0,7,9],analyz:[0,8,9],anderson:[0,1,9],ani:[0,5,9,49],append_search_summari:[0,9],appendchild:[0,9],appli:[0,4,9,26],applic:[0,49],approxim:[28,30],arch:[0,50],architectur:[0,50],area:28,arg:[0,2,3,4,7,9],argument:[0,2,3,9,14,15],around:[0,6,9],arrai:[0,3,5,7,8,9,14,19,44],artifici:49,ascii:0,assert:[0,2,9],associ:[0,9],assum:[14,22],assuredli:[0,9],astro:0,astropi:[0,50],astyp:[0,3,9],attenu:[0,2,9],attr:0,attribut:[43,46,47],audio:0,autoconf:[0,50],automak:[0,50],automat:[0,50],avail:[0,32,42,44,49,51],averag:[0,1,3,9,10,23],avg_method:[0,3,9],axi:0,axion:[0,1,9],axvlin:[0,9],band:[0,1,2,5,9,16,18,19,23,31],bandpass:[0,2,9],bandwidth:[1,2,3,5,7,12,13,18,23,49],bank:[1,2,12,17,18,23,49],base:[9,13,48],basic:[0,1,9],becom:[0,51],been:[0,1,7,9,50,51],befor:[0,3,9,11],beforehand:[0,50],begin:[0,9],being:[0,1,9],below:[0,1,2,9],best:[0,50],better:[0,1,9],between:[0,1,3,5,7,9,10,14,23],big:[0,4,9],bigger:[0,50],bin:[0,3,9,26,28],bit:[0,50],block:[1,22,24,28],blue:0,both:[0,3,4,7,9],bradi:[0,1,9],branch:[0,9],browser:[0,51],budker:[0,51],build:[0,50],bunch:23,burst:[0,1,7,9],c0aonvert:[0,1,9],calcul:[0,1,3,6,7,9,11,16,43],calculate_spectral_correl:[0,4,9],call:[0,50],can:[0,1,2,3,5,7,8,9,22,26,49,50,51],caus:[0,2,9],caveat:26,ceil:0,central:[0,3,5,9],central_freq:[0,7,9],chang:[0,1,9],channel:[1,2,5,7,8,12,23,46,47,49],channel_nam:[0,9],check:[7,12,49],chi2:[0,9],childnod:[0,9],chisq_dof:[0,7,9],choic:11,chose:0,chri:10,chunk:[0,8],click:[0,51],clip:[0,9],clip_sampl:[0,9],close:0,cmap:[0,9],coalesc:0,code:[0,9],collabor:[0,1,7,9,49,51],color:[0,9],column:[0,7,9],comb:[0,1,9],command:[0,50],commensur:[0,9],commonli:[0,7,9],compar:[0,1,9],compil:[0,50],complain:[0,33],complet:0,complex128:[0,9],complex16frequencyseri:[0,5,9],complex:[0,9,26,33],complic:[0,51],comput:[1,3,8,13,14,15,49],compute_channel_renom:[0,6,9],compute_filter_ips_adjac:[0,5,9],compute_filter_ips_self:[0,5,9],condit:0,confid:[0,7,9],consecut:[0,3,9,10,23,42],consid:[0,1,7,9],consist:0,construct:[1,7,16,28,41],construct_arg:[0,2,9],construct_utc_from_metadata:43,contact:[10,49],contain:[0,8,9,49,50],continu:[0,9,32,49],contrib:[0,50],convert:[0,3,9,17],convolv:[0,9],copi:[0,9],correct:[0,9,49],correl:[1,3,11,49],correspond:[0,3,5,9],corrupt:[0,4,9,22],cover:[0,9,28],creat:[1,3,5,7,18,19,26,42,48,49,51],create_filter_bank:[0,5,9],cross:[0,49],current:[0,9,24],current_band:[0,9],custom:[0,8],cutoff:[0,2,9],cvs_entry_tim:[0,9],cvs_repositori:[0,9],dai:[0,8],dash:[0,8],data:[1,2,3,4,5,6,7,8,10,11,12,17,22,23,24,32,37,41,42,43,44,45,46,47,48,49,51],data_band:[0,2,9],data_dt:0,data_list:[0,44],data_ord:[0,42],datapath:[0,8],dataqualitydict:0,dataqualityflag:0,dataset:[0,9,51],date:[0,8,9,41,43],datestr:41,deal:[0,4,9],deb:[0,50],defin:[1,2,3,5,7,8,12,23,43,45],degre:[0,1,9,28],del:[0,9],delta_f:[0,3,5,9,18,28],delta_t:[0,9,28,30],deltaf:[0,5,6,9],densiti:[1,10,19,24,33,49],dep:[0,50],depend:[0,50],deprec:30,describ:[0,1,9],descript:[0,1,9],detail:[0,49,50],detect:[0,1,3,9,49],determin:[1,2,3,4,43],determine_output_seg:[0,9],develop:[0,1,9,50],deviat:[0,49],devscript:[0,50],dictionari:[0,6,9,32,42,44],differ:[1,6,7,8,49],directli:[0,51],discard:[0,4,9],discret:[0,3,9],displai:[0,3,5,9],distinguish:[0,9],distribut:0,divid:26,document:[0,1,4,9,49],dof:[0,9,28,30],dof_til:[0,9],doing:[0,8],domain:[0,1,5,7,9,11,17],don:[0,9],done:[0,1,4,8,9,51],download:[0,50],dpi:0,draw:0,drop:[0,9],dt_stride:22,dtype:[0,9],durat:[1,7,20,23,31,49],dure:[0,1,6,9],each:[1,3,5,6,18,23,42,44,51],easi:[0,50],easili:[0,2,8,9,50],edg:0,edgecolor:0,effect:22,effici:[0,1,9],either:[0,2,4,8,9,51],element:0,elif:[0,2,9],els:[0,2,7,9],end:[0,1,4,7,8,9,26,43],end_tim:[0,8,9,34],endswith:[0,9],endtim:45,energi:[0,1,9,16,28,30],enough:[0,9],ensembl:0,ensur:[0,3,9],entir:[0,1,9,51],epoch:[0,9],eqn:[0,4,9],equal:[0,1,2,4,5,9],equat:[28,30],error:[0,2,9],especi:[0,9],essenti:[0,50],estim:[1,7,10,49],etc:[0,50],etime_max_idx:[0,9],etime_min_idx:[0,9],event_id:[0,7,9],event_list:[0,7,9,21,31],everi:[0,9,23],everywher:[0,9],exampl:49,except:[0,9],excess:[8,23],excess_pow:[0,8],excesspow:[0,6,9,25],exist:[0,1,3,9],exit:[0,2,9],explor:[1,2,6],expos:0,ext:25,extens:[0,1,9],extent:[0,1,5,9],extract:[41,43,45],f_0:0,facecolor:0,factor:[0,9],fairli:[0,50],fake:49,fakesign:0,fals:[0,1,9,23,45],favor:30,fd_psd:[0,3,5,9,19,24,36],fdb:[0,5,9,17],fft:0,fft_window_len:11,fftconvolv:[0,9],fftlength:0,fhigh_idx:[0,9],field:[8,10,17,23,24,45,48,49],figur:[0,1,9],file:[23,42,43,44,45,46,47,51],file_ord:0,file_to_seg:0,filenam:0,filter:[1,3,12,13,14,15,17,18,23,28,49],filter_band:0,filter_bank:[0,5,6,9,13,19],filter_len:28,filtered_seri:[0,9],finest:[0,2,9,12,26],first:[0,1,2,3,5,7,8,9,24,45,50],fitler:28,fix:[0,9,50],fixm:[0,9],float64:[0,3,9],flow:[0,2,5,9,18],flow_idx:[0,9],fmax:[12,18,23],fmin:[12,18,23],fname:[0,9,34,39],focu:[0,49],follow:[0,1,2,3,5,8,9,50],foremost:[0,9],format:10,fortun:[0,51],found:[0,1,9,23,50],fourier:[0,1,3,5,9],fout:0,fraction:[0,4,9,23],free:[0,51],freedom:[0,1,9,28],frequenc:[1,2,3,4,5,6,11,12,14,17,18,19,23,24,26,28,49],frequencyseri:[0,9],fribourg01:[0,8],from:[0,1,2,3,4,5,7,9,10,12,17,22,23,26,41,43,46,47,50,51],from_cli:[0,3,9],from_lal:0,front:[0,50],fs_data:[0,9,19],full:[0,32,50],full_band:[0,9],full_data:[0,44],full_seglist:[0,32],fullpath:0,fundament:[0,9],further:[0,5,9],fwhm:[0,5,9],gaussian:[0,1,9,23,26],gca:0,gda:[0,8,50],gener:[37,40,44,50],generate_timeseri:0,get:[2,9,24],get_start:[0,9],git_vers:[0,9],give:[0,1,4,9],given:[1,13,14,15,22,28,30],glob:[0,45],global:[0,9],glue:[0,7,9,43,50],gnome:[1,8,9,45,49,50,51],gnomedr:[0,8,45],goe:[0,9],gps_end_tim:[0,9],gps_start_tim:[0,9],grant:[0,51],gravit:[0,1,9],great:[0,1,9],greater:[0,2,9],gwpy:[0,50],h5py:[0,43,46,47],h_norm:[0,9],haar:26,half:[0,2,3,9],handi:[0,51],handl:[0,49],hann:11,happen:[0,1,9],has:[0,1,9,11,51],have:[0,4,7,8,9,26,49,50,51],hdf5:[0,9,42,43,44,46,47],help:[0,1,3,9],here:[0,2,9,49,50],hertz:[0,3,9],hesit:49,hfile:[0,43,46,47],hide:0,high:[0,1,2,9],high_frequency_cutoff:[0,9],higher:[0,2,9],highest:[0,3,9,12,23],hour:[0,8],how:[0,1,4,9,50],howev:[0,50],hrss:[0,5,9],hstack:0,http:[0,50],i386:[0,50],identifi:[0,1,2,9,23],idx_max_off:[0,9],ifo:[0,7,9,25],implement:[0,1,9],improv:[0,1,9],includ:[0,9,50],increment:[0,5,9],inde:[0,2,9],independ:[0,9,26],index:[0,7,9,14,24],indic:26,individu:[0,1,3,9,26],inform:[0,7,9,10,50],initi:[0,9],initialis:6,inject:49,inner:[0,5,9,14,15,28,30],input:[0,5,9,14,15,22,26],inseg:[0,9,22],instal:49,instanc:[0,8],instrument:[0,1,3,9],integ:[0,4,9],intern:[0,6,9],internet:[0,51],interpret:[0,1,9],interv:[0,9,43],introduc:[0,1,9,11],invers:[0,1,3,9],investig:[0,1,9],invidu:[0,3,9],invok:33,isf:[0,9],isn:[0,9],issu:33,its:[0,1,2,4,9,14],jessi:[0,50],join:0,jupyt:[0,8],jupyterhub:[0,51],keep:26,kei:0,know:[0,7,9],known:[0,1,9,26],label:0,lal:[0,3,9,50],lal_filt:[14,15,30],lal_psd:[0,3,5,9],lalapp:[0,50],lalburst:[0,50],lalsimul:[0,50],lalsuit:49,larg:[0,1,9,33],larger:[0,2,4,9],last:[0,5,9,24,45],later:0,latest:[0,50],lead:[0,1,9],least:[0,8],len:[0,5,6,7,9],length:[0,1,3,5,7,9,10,23,43],let:[0,2,9],level:[0,9],level_tdiff:[0,9],librari:[0,3,9,43],libtool:[0,50],lift:[0,3,9],ligo:[0,1,7,9,50],ligo_lw:[0,9],ligolw:[0,7,9],ligotimegp:[0,9],like:[0,1,2,9,51],limit:[0,1,9],linear:[0,9],link:[0,50],linux:[0,50],list:[1,5,17,32,42,44,50],load:[0,7,9],locat:[0,8,50],log:[0,6,9],logarithm:[0,9],login:[0,51],loglength:0,longer:[0,1,9],longest:[0,1,9],look:[0,6,9,26],loop:[0,9],lose:[0,4,9],low_frequency_cutoff:[0,9],lower:[0,2,9],lowest:[0,2,9,12,18,23],lsc:[0,7,9],lscsoft:[0,50],lsctabl:[0,7,9],machin:[0,50,51],made:[0,1,7,9,49],magfield:[0,8],magnet:[8,10,17,23,24,45,48,49],magneticfield:[0,21,44],magnitud:[0,3,9],mai:[0,50],main:49,mainz:[0,51],make:[0,2,9,30],make_filenam:[0,9],make_indp_til:[0,9],manag:[0,50],mani:[0,9,30],manual:[0,9],map:[1,19,26,28,49],mask:0,match:[0,9],matched_filter_cor:[0,9],matchedfilt:[0,9],math:[0,6,9],matplotlib:[0,50],max:[0,9],max_dof:[0,9],max_dur:[0,9,16,23],max_frequ:[0,2,9,12],maxim:[0,9],maximum:[0,2,7,9],mean:[0,4,5,8,9,11,26,51],measur:[0,1,3,9,10],measure_hrss:[0,9,30],median:[0,8],member:[0,51],messag:[0,2,9],metadata:[9,12,41,43,46,47],method:[0,1,3,8,9,10,23,50],min:[0,2,9],min_band:[0,6,9],min_frequ:[0,2,9,12],minimum:[0,2,7,9],minut:[0,8],mkdir:[0,9],mod:[0,9],model:[0,9],moder:[0,1,9],modul:[3,7,8,43],mono:0,month:[0,8],more:[0,1,9,10,51],most:[0,50],move:[0,9],mu_sq:[0,9,16,26,27],mu_sq_dict:[0,6,9],much:[0,1,4,9],multi:[9,49],multipl:1,must:[0,2,9,50],name:[0,8,42,43,44,45],nanosecond:[0,9],narrow:[0,9],nativ:28,nc_sum:[0,6,9,16,26,27],nchan:[0,2,5,6,8,9,13,18,19,23],ndof:[0,9],necessari:[0,5,9],need:[0,3,5,7,9,10,33,50],neg:[0,9],next:[0,9],nois:[0,1,3,9,23],nomin:[0,4,9],non:[0,9],none:[0,2,5,7,9,11,14,15,18,23,31,37,40,45],norm:40,normal:[5,26,40,49],normalis:[0,9],note:[0,10,26,30,50],notebook:[0,8],now:[0,1,9],number:[0,1,2,5,9,12,23,28,30,33],numpi:[0,3,4,9,50],nyquist:[0,1,2,3,9],object:[43,46,47],offset:[0,4,7,9],onc:[0,9,50],one:[0,1,2,4,7,9],ones:[0,9],onli:[0,1,2,9,11],open:[0,9,43],oper:[0,51],optim:[0,1,9,26],option:[0,2,3,9,22],order:[0,3,8,9],org:[0,50],origin:[0,4,9,26],other:[0,9],our:[0,26,51],ourselv:26,out:[0,9],outli:[0,1,9],output:[2,26],outseg:[0,9],outsid:0,over:[0,9],overbar:40,overlap:[0,1,3,9,26],overview:49,own:[0,8,51],packag:[0,8,9,43,49,50],pad:0,page:[0,50],pankow:10,paramet:[0,3,7,9,10,11,12,17,18,19,23,24,28,32,41,42,43,44,45,46,47,48],parenthesi:0,part:[0,4,9,51],particl:[0,1,9],particular:[0,8],particularli:30,pass:[0,2,5,9],path1:0,path2:0,path3:0,path:[0,8,44],pattern:[0,1,3,9],pdb:[0,9],peak_tim:[0,7,9],peak_time_n:[0,7,9],peopl:[0,51],per:0,percent:[0,9],perform:[0,3,9,23,49],period:[0,3,9,32,45],periodogram:[0,1,3,9],pip:[0,50],plan:26,plane:[0,1,2,9,26],plot:[1,3,8,23,49],plot_act:[0,8],plot_asd:[0,8],plot_spectrogram:[0,9],plot_spectrum:[0,3,9],plot_t:[0,9],plot_time_seri:[0,8],plot_trigg:[0,8],plot_whiten:[0,8],plotarg:0,plotter:0,png:[0,9,34,39],point:[0,7,9,11,24,33,49],port:[0,50],posit:[0,9],possibl:[0,2,6,9,51],power:[8,10,19,23,24],pre:[0,4,7,9],precis:[9,33],preload:43,presenc:[0,9],present:[0,1,8,9],presev:[0,4,9],pretti:[0,50],print:[0,2,3,5,6,9],probabl:[0,1,4,9,23],problem:[0,50],proc_row:[0,9],proce:[0,3,9],procedur:[0,1,9],process:[0,1,3,9,22,49],process_id:[0,7,9],produc:[3,8,9,23],product:[0,5,9,14,15,28,30],program:[2,9,49],properli:0,provid:[0,5,9],psd:[1,5,6,10,14,15,18,30,31,49],psd_estim:[0,3,8,9,10,23],psd_segment_length:[0,3,4,8,9,10,16,21,23],psd_segment_strid:[0,3,8,9,10,23],pure_ton:0,pustelni:[0,1,9,49],put:[0,50],pycbc:[0,3,9,50],pypi:[0,50],pyplot:[0,9],python:[0,8,50,51],quantiti:[0,9],question:49,quickli:[0,9],radiat:[0,1,9],random:0,random_tim:0,rang:[3,6,23,43],rate:[0,2,3,9,10,12,17,22],ratio:[0,9],read:[0,9],recomput:30,record:[0,7,9],redefin:[0,9],reduc:[0,1,3,9],refer:[0,9,28,30],region:[0,9],register_to_xmldoc:[0,9],rel:[0,9],relat:[0,50],remov:[0,9],renorm:[0,6,9,13],rep:[0,8,45],repetit:[0,1,3,9],repositori:[0,8,9],repres:[0,9,43],represent:[0,1,9],request:[0,7,9,51],requir:[0,1,9,30,45,50],resampl:45,rescal:0,reset:[0,9],resolut:[0,9,26,28],respect:[0,1,3,7,9],rest:[0,50],result:[1,3,26],retriev:[8,12],retrieve_channel_data:0,reveal:[0,1,3,9],root:[0,4,9],round:[0,9],routin:50,row:[0,9],run:[0,9,50],sai:[0,9],sam:[0,51],same:[0,14],sampl:[0,1,2,3,7,9,10,12,17,22,28],sample_frequ:[0,3,9],sample_r:[0,2,3,7,9,10,12,17,22,34],samplingr:0,san:[0,5,9],saniti:0,satisfi:0,save:[0,9,26],savefig:[0,9],scalar:0,scale:[0,4,9],scan:[0,3,9],scientif:[0,7,9],scipi:[0,9,50],script:[0,8,51],search:[1,7,9,23],second:[0,3,7,9,10,23],section:[0,1,2,9],see:[0,2,4,9,28,30,49],seg:[0,25],seg_len:[0,3,4,7,9,19],seg_strid:[0,3,9],segfil:0,segfold:[0,9],seglist:[0,8,37,40],segment:[1,3,6,7,9,10,22,23,42,43,44],segmentlist:0,segmentplot:0,segnam:43,select:[0,23,32,51],semi:[0,9,43],send:[0,51],sens:[0,2,9],separ:[0,1,3,5,8,9,10,23,50],seri:[1,3,4,5,7,9,10,23,24,37,44,47,48,49],server:[8,49],serverdata:[0,8,45],set:[12,14,15,22,49],set_titl:0,set_trac:[0,9],set_xlim:[0,9],set_xscal:0,set_ylim:[0,9],set_yscal:[0,9],setframer:0,setnam:[0,9,21,44,46,47],setnchannel:0,setsampwidth:0,setup:[0,9,50],sever:[0,50],shape:[0,9],should:[0,8,49,50,51],show:[0,1,9],side:[0,4,9],signal:[1,2,3,9,28,30,49],simburstgaussian:0,simburstsinegaussian:0,similar:[0,1,7,9],simpli:[0,4,9,50],sin:[0,9],sinc:[0,9,30,50],sine:[0,49],singl:[0,7,9],slow:30,smaller:[0,4,9],smallest:[0,3,9],snglbursttabl:[0,7,9],snr:[0,7,9],softwar:[8,49,50,51],some:[0,49,50],somehow:[0,33],soon:[0,49],sort:0,sound:48,sourc:[0,1,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,50],space:[0,1,5,9],span:[0,3,9],spec:[0,34],spec_corr:[0,4,5,6,9,13,14,15,18,30],specgram:34,specif:[0,8,9,43,46,50],specifi:[0,2,3,8,9],spectral:[1,2,10,11,33,49],spectrogram:[9,40,49],spectrogramplot:0,spectrum:[0,1,9,19,24],spectrumplot:0,split:[0,1,3,9],sqrt:[0,4,9,40],squar:[0,3,4,5,9,28,30],src:[0,50],ssh:[0,51],stabl:[0,50],standard:[0,49],start:[0,7,8,9,33,43],start_freq:31,start_tim:[0,7,8,9,31,34],start_time_n:[0,7,9],starttim:45,stat:[0,9],state:[0,1,5,9],statement:[0,2,9],station:[0,8,9,21,23,32,33,37,40,42,45],std:[0,9],stderr:[0,2,9],step:[0,1,9,49,50],still:[0,4,9],stochast:[0,9],stop_tim:[0,7,9],stop_time_n:[0,7,9],store:[0,1,3,7,9,26,44],str:[41,43,45],strain:[0,2,9],strain_high_pass:[0,2,9],strang:[0,2,9],strategi:[0,1,9],stretch:22,stride:[3,22],string:[10,23,42,46,47],structur:[0,1,3,9],studi:0,sub:[0,9],sub_psd:29,subsequ:[0,3,9],sudo:[0,50],suffic:[0,50],suggest:49,sum:[0,1,5,9,26,28,30],sum_filt:[0,9],surviv:[0,9],swig:[0,3,9],symbol:[0,8],synchron:[0,9],sys:[0,2,9],system:[0,9,50,51],t0s:26,t0str:41,t1200125:[28,30],t_idx_max:[0,7,9,24],t_idx_max_off:[0,7,9],t_idx_min:[0,7,9,24],t_idx_off:[0,7,9],tab:[0,51],tabl:[0,7,9],tag:25,tarbal:[0,50],task:[0,49],tdiff:[0,9],technic:[0,1,9],templat:[0,9],temporari:[0,9],termin:[0,51],test2:0,test:[0,49],tf_map:[0,9,16,26,27],tfmap:31,than:[0,1,2,7,8,9],thei:[0,50],themselv:[0,5,9,15],theori:[0,1,9],therefor:[0,1,9],thi:[0,1,2,3,4,5,8,9,23,26,30,49,50,51],thing:[0,2,3,8,9,26],think:26,those:[0,51],threshold:[0,1,9,23,31],through:[0,3,9,49,51],throughout:[0,1,9],tile:[1,2,16,20,23,26,28,30],tile_bandwidth:[0,2,9,12],tile_energi:29,tile_fap:[0,8,9,23],time:[1,2,3,4,5,7,8,10,17,19,23,24,26,28,32,37,41,43,44,47,48,49],time_seri:0,timeseri:[0,9,10,23,24,48],timeseriesplot:0,timestamp:[0,41,45],titl:0,tlen:[0,9],tmp_filter_bank:[0,9],tmp_ts_data:[0,9],to_frequencyseri:[0,9],todo:[0,9],togeth:[0,1,9],tool:49,total:[0,1,2,3,5,9,12,23],tprint:[0,6,9],track:26,transform:[0,3,9,11,26],translat:[0,1,9],tri:[0,50],trigger:[1,6,23,49],trigger_list_from_map:[0,9],ts_data:[0,3,7,8,9,10,16,21,23,24],ts_list:[0,8,33,37,40],tsum:[0,9],tukei:[0,4,9,11],tune:[0,4,9],twice:[0,1,5,9,28],two:[0,5,9,11,49,51],txt:0,type:[0,7,9],under:[0,9],undersampl:[0,9],uni:[0,51],uniformli:0,uninstal:[0,50],unit:[0,3,7,9,26],unix:[0,51],unpack:[0,50],unwhite_filter_ip:[0,5,9],unwhite_ss_ip:[0,5,9],unwhiten:[0,5,9,14,15,28,30],upon:[0,1,9],us_rat:[0,9],use:[0,1,3,7,9,12,51],used:[0,1,3,4,5,6,8,9,43],useful:[0,49,50],user:[1,2,3,7,8,9,23,45,49,50],usernam:[0,51],using:[0,1,2,3,6,7,9,43,44,50,51],usual:[0,49],utc:[0,41,43],uw_ss_ii:28,uw_ss_ij:28,valid:[0,9,11],valu:[0,1,3,5,7,8,9],valueerror:[0,9],variabl:[0,3,9,26],varianc:[0,1,3,9,26],variou:[0,9],vector:[0,9],verbos:[0,9],veri:[0,51],version:[0,9,50],vincent:[0,8],viridi:[0,9],virtual:49,vmax:0,vmin:0,w_ss_ij:28,wai:[0,26,50,51],want:0,warn:[0,2,9],wav:0,wave:[0,49],waveform:[0,9],wavelet:26,wax:0,webpag:[0,51],websit:[0,50],welch:[0,3,9],welcom:0,well:[0,3,9,23],what:[0,1,3,9],when:[0,4,9],where:[0,1,7,8,9,26],which:[0,1,2,3,4,5,9,28,50,51],white_filter_ip:[0,5,9],white_plot:0,white_ss_ip:[0,5,9],whiten:[3,4,5,28],whitened_spectrogram:0,who:[0,51],whole:37,why:[0,9],wide:[0,9],wider:26,width:[0,9],window:[4,11,22,23,24,51],window_fr:22,window_fract:[0,4,8,9,11,16,21,22,23],window_offset_epoch:[0,9],window_sigma_sq:[0,4,9],wing:[0,9],wish:[0,7,9,51],withen:[0,9,23],within:[0,9,23],withing:45,without:22,won:[0,9],word:[0,9],work:[1,9,26,49,50,51],would:[0,9],wout:0,write:[0,9],write_filenam:[0,9],writefram:0,written:[0,1,9],wspec:0,wtype:11,xlabel:0,xlim:0,xml:[23,25],xmldoc:[0,9],xrang:[0,9],xunit:0,year:[0,8],ylabel:0,ylim:0,you:[0,8,49,50,51],your:[0,8,50,51],z_j_b:[0,9,28,30],zero:[0,9,26],zpad:0},titles:["GNOME Data Analysis Software","Excess Power - Overview","Excess Power - Step 1: Checking filtering settings","Excess Power - Step 2: Estimate Power Spectral Density (PSD)","Excess Power - Step 3: Two point spectral correlation","Excess Power - Step 4: Computing the filter bank","Excess Power - Step 5: Normalization of virtual channel","Initialise event list and determine stride boundaries","Working Example","Excess-Power algorithm","gdas.epower.calculate_psd","gdas.epower.calculate_spectral_correlation","gdas.epower.check_filtering_settings","gdas.epower.compute_channel_renormalization","gdas.epower.compute_filter_ips_adjacent","gdas.epower.compute_filter_ips_self","gdas.epower.construct_tiles","gdas.epower.convert_to_time_domain","gdas.epower.create_filter_bank","gdas.epower.create_tf_plane","gdas.epower.create_tile_duration","gdas.epower.create_xml","gdas.epower.determine_output_segment","gdas.epower.excess_power","gdas.epower.identify_block","gdas.epower.make_filename","gdas.epower.make_indp_tiles","gdas.epower.make_tiles","gdas.epower.measure_hrss","gdas.epower.measure_hrss_poorly","gdas.epower.measure_hrss_slowly","gdas.epower.trigger_list_from_map","gdas.plots.plot_activity","gdas.plots.plot_asd","gdas.plots.plot_spectrogram","gdas.plots.plot_spectrogram_from_ts","gdas.plots.plot_spectrum","gdas.plots.plot_time_series","gdas.plots.plot_triggers","gdas.plots.plot_ts","gdas.plots.plot_whitening","gdas.retrieve.construct_utc_from_metadata","gdas.retrieve.create_activity_list","gdas.retrieve.file_to_segment","gdas.retrieve.generate_timeseries","gdas.retrieve.magfield","gdas.retrieve.retrieve_channel_data","gdas.retrieve.retrieve_data_timeseries","gdas.utils.create_sound","Welcome!","Installation","Multi-user Server"],titleterms:{"new":[0,9],"switch":[0,9],GPS:[0,9],access:[0,1,2,3,4,5,6,9],activ:0,algorithm:[0,9],amplitud:0,analys:[0,9],analysi:[0,49],apt:[0,50],artifici:0,asd:0,bandwidth:[0,9],bank:[0,5,9],base:0,block:[0,9],boundari:[0,7,9],calculate_psd:10,calculate_spectral_correl:11,channel:[0,6,9],check:[0,2,9],check_filtering_set:12,code:[1,2,3,4,5,6],comput:[0,5,9],compute_channel_renorm:13,compute_filter_ips_adjac:14,compute_filter_ips_self:15,concaten:0,construct:[0,9],construct_til:16,construct_utc_from_metadata:41,convert_to_time_domain:17,correl:[0,4,9],creat:[0,9],create_activity_list:42,create_filter_bank:18,create_sound:48,create_tf_plan:19,create_tile_dur:20,create_xml:21,data:[0,9],debian:[0,50],defin:[0,9],densiti:[0,3,9],determin:[0,7,9],determine_output_seg:22,differ:[0,9],durat:[0,9],each:[0,9],epow:[10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],establish:0,estim:[0,3,9],event:[0,7,9],exampl:[0,8],excess:[0,1,2,3,4,5,6,9,49],excess_pow:23,explor:[0,9],extract:[0,9],fake:0,field:[0,9],file:[0,9],file_to_seg:43,filter:[0,2,5,9],find:[0,9],frequenc:[0,9],gda:[10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48],gener:[0,9],generate_timeseri:44,get:[0,49,50],given:[0,9],gnome:0,identify_block:24,initialis:[0,7,9],inject:0,instal:[0,50],introduct:0,invok:0,issu:0,lalsuit:[0,50],list:[0,7,9],mac:[0,50],macport:[0,50],magfield:45,magnet:[0,9],main:[0,50],make_filenam:25,make_indp_til:26,make_til:27,map:[0,9],measure_hrss:28,measure_hrss_poorli:29,measure_hrss_slowli:30,metadata:0,modul:[0,9],multi:[0,51],multipl:[0,9],normal:[0,6,9],output:[0,9],overview:[0,1,9],plot:[0,9,32,33,34,35,36,37,38,39,40],plot_act:32,plot_asd:33,plot_spectrogram:34,plot_spectrogram_from_t:35,plot_spectrum:36,plot_t:39,plot_time_seri:37,plot_trigg:38,plot_whiten:40,point:4,power:[0,1,2,3,4,5,6,9,49],precis:0,prepar:[0,9],produc:0,program:[0,50],psd:[0,3,9],rang:[0,9],real:0,result:[0,9],retriev:[0,41,42,43,44,45,46,47],retrieve_channel_data:46,retrieve_data_timeseri:47,routin:[0,9],search:[0,49],segment:0,seri:0,server:[0,51],set:[0,2,9],signal:0,simul:0,softwar:0,sound:0,spectral:[0,3,4,9],spectrogram:0,start:49,step:[2,3,4,5,6],stride:[0,7,9],tile:[0,9],time:[0,9],tool:[0,50],trigger:[0,9],trigger_list_from_map:31,two:4,user:[0,51],util:[0,9,48],virtual:[0,6,9],welcom:49,whiten:[0,9],window:[0,9],work:[0,8]}}) \ No newline at end of file diff --git a/docs/server.html b/docs/server.html new file mode 100644 index 0000000..6d9d1ae --- /dev/null +++ b/docs/server.html @@ -0,0 +1,88 @@ + + + + + + + + Multi-user Server — gdas 0.2.9 documentation + + + + + + + + + + + + + + + + + +
+ + +
+

Multi-user Server

+

A GNOME JupyterHub, or multi-user server has been created to allow each member to access the entire available dataset. Member who do not have access to the server but wish to access it should send a request to Dr. Sam Afach. Member who are not part of the GNOME collaboration will not be granted access to the dataset but are free to use our software on their own data.

+

The server can be accessed in two ways, either by acceding the server’s webpage, or from your terminal through SSH:

+
ssh -X username@budker.uni-mainz.de -p 8022
+
+
+

While SSH is very handy for people using UNIX-like operating systems, this can become more complicated for those working on Windows machines. Fortunately, access to a terminal is also possible through the webpage, which means directly from your internet browser! This can be done by clicking on the New tab after login and select Terminal:

+
+_images/jupyter1.png +
+

You can then use the terminal window to access files and create new Python scripts for your analysis.

+
+_images/jupyter2.png +
+
+ + +
+ + + + + \ No newline at end of file diff --git a/gdas/__init__.py b/gdas/__init__.py index 8ab14c5..ab70788 100644 --- a/gdas/__init__.py +++ b/gdas/__init__.py @@ -1,4 +1,4 @@ -__version__ = '0.2.9' +__version__ = '0.3.0' from .epower import * from .plots import * diff --git a/gdas/epower.py b/gdas/epower.py index 5522665..717789a 100644 --- a/gdas/epower.py +++ b/gdas/epower.py @@ -47,8 +47,6 @@ def excess_power(ts_data,psd_segment_length,psd_segment_stride,psd_estimation,wi #print strain.insert_strain_option_group.__dict__ #print psd.insert_psd_option_group.__dict__ sample_rate = ts_data.sample_rate - print sample_rate - quit() nchans,band,flow = check_filtering_settings(sample_rate,nchans,band,fmin,fmax) seg_len,fd_psd,lal_psd = calculate_psd(ts_data,sample_rate,psd_segment_length,psd_segment_stride,psd_estimation) window, spec_corr = calculate_spectral_correlation(seg_len,'tukey',window_fraction=window_fraction) @@ -414,7 +412,7 @@ def measure_hrss(z_j_b, uw_ss_ii, uw_ss_ij, w_ss_ij, delta_f, delta_t, filter_le s_j_nb_avg = uw_ss_ii.sum() / 2 + uw_ss_ij.sum() s_j_nb_avg *= delta_f s_j_nb_denom = s_j_b_avg.sum() + 2 * 2 / filter_len * \ - numpy.sum(numpy.sqrt(s_j_b_avg[:-1] * s_j_b_avg[1:]) * w_ss_ij) + numpy.sum(numpy.sqrt(s_j_b_avg[:-1] * s_j_b_avg[1:]) * w_ss_ij) # eqn. 62 uw_ups_ratio = s_j_nb_avg / s_j_nb_denom # eqn. 63 -- approximation of unwhitened signal energy time series diff --git a/gdas/plots.py b/gdas/plots.py index 8749113..aaebb15 100644 --- a/gdas/plots.py +++ b/gdas/plots.py @@ -1,4 +1,4 @@ -import matplotlib,numpy +import matplotlib,numpy,mlpy matplotlib.use('Agg') from astropy.units import Quantity from matplotlib import pyplot @@ -8,6 +8,8 @@ from gwpy.spectrogram import Spectrogram from gwpy.table.lsctables import SnglBurstTable from gwpy.timeseries import TimeSeries +from pylab import * +from scipy import signal def plot_activity(full_seglist): """ @@ -208,3 +210,48 @@ def plot_tiles(): cnt += tmp plot_spectrogram(dof_tiles.T,fname='%s/tf_%ichans_%02idof.png'%(segfolder,nc_sum+1,2*j)) plot.savefig("%s/bands.png"%(segfolder)) + +def wavelet(ts_data): + z = numpy.array([float(i) for i in ts_data]) + t = numpy.array([float(i) for i in ts_data.sample_times]) + # Decimate magnetic field data to 1 sample/second + rate = [5,10,10] if ts_data.sample_rate==500 else [8,8,8] + for i in rate: + z = signal.decimate(z,i,zero_phase=True) + # Extract time every 500 sample + t = [t[n*ts_data.sample_rate] for n in range(len(t)/ts_data.sample_rate)] + # Convert every timing points to scale (hr,min,sec) units + s = 60. + t = [(t[i]-t[0])/s for i in range(len(t))] + # Do wavelet analysis + omega0 = 6 + fct = "morlet" + scales = mlpy.wavelet.autoscales(N=len(z),dt=1,dj=0.05,wf=fct,p=omega0) + spec = mlpy.wavelet.cwt(z,dt=1,scales=scales,wf=fct,p=omega0) + freq = (omega0 + numpy.sqrt(2.0 + omega0 ** 2)) / (4 * numpy.pi * scales[1:]) * 1000 + idxs = numpy.where(numpy.logical_or(freq<0.1,1000>sys.stderr, "Loaded %d time slides." % len(time_slides) +else: + time_slides = {} + + +# +# Make new time slides. +# + + +if options.verbose: + print >>sys.stderr, "Computing new time slides ..." + +# dictionary mapping time_slide_id --> (dictionary mapping insrument --> offset) + +for time_slide in ligolw_tisi.SlidesIter(ligolw_tisi.parse_slides(options.instrument)): + time_slides[lsctables.TimeSlideTable.get_next_id()] = time_slide +for inspiral_slidespec in options.inspiral_num_slides: + for time_slide in ligolw_tisi.Inspiral_Num_Slides_Iter(*ligolw_tisi.parse_inspiral_num_slides_slidespec(inspiral_slidespec)): + time_slides[lsctables.TimeSlideTable.get_next_id()] = time_slide + +if options.verbose: + print >>sys.stderr, "Total of %d time slides." % len(time_slides) + + +# +# Remove duplicates. +# + + +if options.verbose: + print >>sys.stderr, "Identifying and removing duplicates ..." + +map(time_slides.pop, ligolw_tisi.time_slides_vacuum(time_slides, verbose = options.verbose).keys()) + +if options.verbose: + print >>sys.stderr, "%d time slides remain." % len(time_slides) + + +# +# Convert to list of (id, offset dictionary) tuples, sorted in reverse +# order by ID. This causes them to be written to the output files in order +# of ID. +# + + +time_slides = time_slides.items() +time_slides.sort(reverse = True) + + +# +# Normalize the time slides +# + + +if options.normalize: + if options.verbose: + print >>sys.stderr, "Normalizing the time slides ..." + time_slides = [(id, ligolw_tisi.time_slide_normalize(offsetdict, **options.normalize)) for id, offsetdict in time_slides] + + +# +# Make documents. +# + + +while time_slides: + # + # Create an empty document, populate the process information. + # + + xmldoc = new_doc() + timeslidetable = table.get_table(xmldoc, lsctables.TimeSlideTable.tableName) + process = ligolw_tisi.append_process(xmldoc, **options.__dict__) + + # + # How many slides will go into this file? + # + + N = int(round(float(len(time_slides)) / len(filenames))) + + # + # Put them in. + # + + while N: + id, offsetdict = time_slides.pop() + for row in ligolw_tisi.RowsFromOffsetDict(offsetdict, id, process): + timeslidetable.append(row) + N -= 1 + + # + # Finish off the document. + # + + llwapp.set_process_end_time(process) + + # + # Write. + # + + filename = filenames.pop(0) + utils.write_filename(xmldoc, filename, options.verbose, gz = (filename or "stdout").endswith(".gz")) diff --git a/Makefile b/sphinx/Makefile similarity index 99% rename from Makefile rename to sphinx/Makefile index 7cd4598..21a36c2 100644 --- a/Makefile +++ b/sphinx/Makefile @@ -46,7 +46,7 @@ help: .PHONY: clean clean: - rm -rf ./docs/ + rm -rf ../docs/ .PHONY: html html: @@ -54,7 +54,7 @@ html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - cp -r ./_build/html/ ./docs/ + cp -r ./_build/html/ ../docs/ rm -rf generated _build *~ .PHONY: dirhtml diff --git a/sphinx/backup.rst b/sphinx/backup.rst new file mode 100644 index 0000000..85ce8b7 --- /dev/null +++ b/sphinx/backup.rst @@ -0,0 +1,1406 @@ +GNOME Data Analysis Software +============================ + +* :ref:`test2` + + .. _test2: + +.. toctree:: + :maxdepth: 2 + :caption: People & By-laws + + test + +.. toctree:: + :maxdepth: 2 + + index.rst + +Introduction +============ + +This package contains functions useful for magnetic field signal processing, with a focus on Excess Power search analysis and application on the data for the GNOME collaboration, see `Pustelny et al. (2013) `_. This documentation details all the available functions and tasks available through this software. Here are some example tasks that can (or will soon to) be handled: + +* Plot usual time series and spectrogram of magnetic field data. +* Perform excess power analysis and plot detected triggers in time-frequency map. +* Create artificial data for testing data analysis. +* Inject fake signal of different bandwidth and durations. +* Cross-correlation of continuous sine wave signals. +* Perform Allan Standard deviation. + +.. raw:: html + + Fork me on GitHub + +Installation +============ + +The program requires the following general packages to run: `Numpy `_, `Matplotlib `_, `Scipy `_ and `Astropy `_. The following LIGO-related packages are also required for full functionality: `Gwpy `_, `PyCBC `_, `Glue `_, `LAL `_, `LALburst `_ and `LALsimulation `_. + +While most of the packages can be installed automatically using `pip `_, some LIGO packages (Glue, LAL, LALburst and LALsimulation) must be installed separately beforehand as they contain several C routines that need specific compilation. However, these packages are already included in a bigger package called `LALsuite `_ which can be installed fairly easily on Debian (Linux) and Mac OS machines. + +LALsuite tools +-------------- + +Some useful pages on how to download and install the LIGO software can be found `here `_. + +MacPorts (Mac) +~~~~~~~~~~~~~~ + +For Mac users, the installation is pretty easy, detailed information can be found on `this page `_. You need to have `MacPorts `_ installed. The following commands should suffice to install the LALsuite package on your machine:: + + sudo port install lscsoft-deps + sudo port install glue + sudo port install lalapps + +The first command will install all the dependencies needed for the LIGO software to be installed. The following 2 commands will install the actual packages. + +apt-get (Debian) +~~~~~~~~~~~~~~~~ + +Since the LIGO software is not a default package in the apt package manager system on Debian machine, additional steps will be needed. The first step is to add the following links to the source list located at ``/etc/apt/sources.list``:: + + deb [arch=amd64] http://software.ligo.org/lscsoft/debian jessie contrib + deb-src [arch=amd64] http://software.ligo.org/lscsoft/debian jessie contrib + +Note that the ``[arch=amd64]`` is needed to fix the architecture problem in case it tries to install i386 version on 64-bit Debian. Once the sources have been added, you must first install all the dependencies as follows:: + + apt-get install build-essential automake autoconf libtool devscripts + +The LIGO software can finally be installed using the following command:: + + apt-get install lscsoft-all + +Main Program +------------ + +The best way to install the GNOME software along with the rest of the dependencies is by using `pip`:: + + pip install gdas + +(You may need to put a ``sudo`` in front of this). For this to work +you need to have `pip +`_ installed. This +method allows for easy uninstallation. + +You can also simply download the tarball from the PyPI website, unpack it and then do:: + + python setup.py install + +The latest stable package can be downloaded from PyPI: https://pypi.python.org/pypi/gdas. +The development version can be downloaded from `here `_. + +Multi-user Server +================= + +A GNOME JupyterHub, or multi-user server has been created to allow each member to access the entire available dataset. Member who do not have access to the server but wish to access it should send a request to Dr. Sam Afach. Member who are not part of the GNOME collaboration will not be granted access to the dataset but are free to use our software on their own data. + +The server can be accessed in two ways, either by acceding the `server's webpage `_, or from your terminal through SSH:: + + ssh -X username@budker.uni-mainz.de -p 8022 + +While SSH is very handy for people using UNIX-like operating systems, this can become more complicated for those working on Windows machines. Fortunately, access to a terminal is also possible through the webpage, which means directly from your internet browser! This can be done by clicking on the New tab after login and select Terminal: + +.. figure:: img/jupyter1.png + :width: 70% + :align: center + +You can then use the terminal window to access files and create new Python scripts for your analysis. + +.. figure:: img/jupyter2.png + :width: 70% + :align: center + +Working Example +=============== + +Either on your own computer or on the server, on a Jupyter notebook or on a Python script, the first thing to do is to import the ``gdas`` package that contain all the modules present in the GNOME software. That can be done easily by doing the following:: + + import gdas + +In order to retrieve a specific chunk of data to be analyzed for a particular station, the name of the station along with the start and end dates should be specified:: + + station = 'fribourg01' + start_time = '2016-11-03-04' + end_time = '2016-11-03-04-2' + +where the start and end times should always have at least the year, month and day specified, and with the values separated by a dash symbol. Hour and minute can also be specified. + +If you are not working on the server and the data are located in a different repository than ``/GNOMEDrive/gnome/serverdata/``, a custom path can be defined. For instance:: + + datapath = '/Users/vincent/data/GNOMEDrive/gnome/serverdata/' + +The magnetic field data can then be retrieve as follows:: + + ts_data,ts_list,activity = gdas.magfield(station,start_time,end_time,rep=datapath) + +The ``gdas.magfield`` method will return 3 arrays of data that can then be used to produce different plots:: + + gdas.plot_activity(activity) + gdas.plot_time_series(station,ts_list,seglist=activity) + gdas.plot_asd(station,ts_list) + gdas.plot_whitening(station,ts_list,activity) + +This is a script to do Excess Power analysis:: + + psd_segment_length = 60 + psd_segment_stride = 30 + psd_estimation = 'median-mean' + window_fraction = 0 + tile_fap = 1e-5 + channels = 250 + + gdas.excess_power(ts_data,psd_segment_length,psd_segment_stride,psd_estimation,window_fraction,tile_fap,station,nchans=channels) + gdas.plot_triggers() + +Data extraction +=============== + +Extracting real data +-------------------- + +Retrieve metadata +~~~~~~~~~~~~~~~~~ + +The first step is to define some variables related to which data we want to study and their location. The ``os.path.join`` method will join that different paths called as arguments (i.e. in the parenthesis):: + + # Set name of the channel to extract + setname = "MagneticFields" + # Define station name and map + station = "fribourg01" + # Define year, month and day + year,month,day = '2016','11','03' + # Define path to main data repository + path1 = '/Users/vincent/ASTRO/data/GNOMEDrive/gnome/serverdata/' + # Define path to day repository + path2 = "%s/%s/%s/%s/"%(station,year,month,day) + # Define generic hdf5 filenames + path3 = "%s_%s%s%s_*.hdf5"%(station,year,month,day) + # Define full generic path name + fullpath = os.path.join(path1,path2,path3) + +We then use the `glob `_ module to list all the files that satisfy the full path name and loop over each HDF5 file and do the following: + +- Extract its metadata using the `h5py `_ package; +- Calculate the segment in time for which the data corresponds to using the :ref:`file_to_segment ` function; +- Store each filename and metadata on two different dictionary variables ``file_order`` and ``file_order``. + +Finally, we extract the sampling rate from one of the file which will be use later in the analysis. The sampling rate is the same for all the data files:: + + # Initialising dictionary for data + file_order,data_order = {},{} + # Loop over all existing data files + for fname in glob.glob(fullpath): + # Read hdf5 file + hfile = h5py.File(fname, "r") + # Extract segment information from file + segfile = file_to_segment(hfile,setname) + # Associate file in dictionary with association to segment data + file_order[segfile] = fname + data_order[segfile] = hfile + # Retrieve sampling rate from last read file + sample_rate = hfile[setname].attrs["SamplingRate(Hz)"] + +Creating segment lists +~~~~~~~~~~~~~~~~~~~~~~ + +This section will create a continuous list of all the data segments available. We use the following modules in order to create the list properly: + +- The `segmentlist `_ module from the ``glue.segments`` library defines the list of segments. The =coalesce()= method is then used to put all the segments in coalesced state. +- The `DataQualityDict `_ module from the ``gwpy.segments`` library allows to store all the data segments in an ordered dictionary. +- The `DataQualityFlag `_ module from the ``gwpy.segments`` library allows to *record times during which the instrument was operating outside of its nominal condition*. + +The script is as follows:: + + # Generate an ASCII representation of the GPS timestamped segments of time covered by the input data + seglist = segmentlist(data_order.keys()) + # Sort the segment list + seglist.sort() + # Initialise dictionary for segment information + full_seglist = DataQualityDict() + # Save time span for each segment in ASCII file + with open("segments.txt", "w") as fout: + for seg in seglist: + print >>fout, "%10.9f %10.9f" % seg + # FIXME: Active should be masked from the sanity channel + full_seglist[station] = DataQualityFlag(station,active=seglist.coalesce(),known=seglist.coalesce()) + # Define start and end time of entire dataset + start, end = full_seglist[station].active.extent() + +Establishing active times +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Here's the script:: + + # Generate an ASCII representation of the GPS timestamped segments of time covered by the input data + seglist = segmentlist(data_order.keys()) + # Sort the segment list + seglist.sort() + # Import gwpy tools + plot = SegmentPlot() + # Initialize plotting figure + ax = plot.gca() + # Plot all segment in figure + ax.plot(full_seglist) + # Save figure + pyplot.savefig("activity.png",dpi=500) + +Retrieve and concatenate the data. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Here's the script:: + + # Generate time series for the ensemble of data + data_list = generate_timeseries(file_order,setname) + # Retrieve channel data for all the segments + full_data = numpy.hstack([retrieve_channel_data(data_order[seg],setname) for seg in seglist]) + # Define log base 2 of the total time length of the full data + loglength = math.log(len(full_data)/sample_rate, 2) + # Define zero padding + zpad = math.ceil(loglength) + zpad = int(2**zpad) - len(full_data)/sample_rate + zpad = numpy.zeros(int(zpad*sample_rate / 2.0)) + # Include padding next to the data + full_data = numpy.hstack((zpad, full_data, zpad)) + # Models a time series consisting of uniformly sampled scalar values + ts_data = types.TimeSeries(full_data,delta_t=1/sample_rate,epoch=seglist[0][0]) + # Loop over all the elements in the dictionary + for v in data_order.values(): + # Close the element + v.close() + +Producing fake data +------------------- + +Create simulated time series data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is easy to create fake data, one can use the `numpy.random.normal `_ method from the Numpy library to draw random samples from a normal Gaussian distribution with mean of 0, standard deviation of 1, and a length equal to the sampling rate (``args.sample_rate``) times the length in seconds of individual segments (``args.psd_segment_length``) times the number of segment the user wish to produce. After defining the starting UTC time, one can then create a time series of the data using the `TimeSeries `_ module from the ``gwpy.timeseries`` library.:: + + print "Create fake data..." + start = 1153742437.0 + end = start + args.psd_segment_length * 16 + station = "gaussian-noise" + setname = "MagneticFields" + full_data = numpy.random.normal(0, 1, int(args.sample_rate * args.psd_segment_length * 16)) + ts_data = TimeSeries(full_data, sample_rate=args.sample_rate,epoch=start) + +Produce and plot fake signal +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Here's the script:: + + delta_t = 1.0/args.sample_rate + filter_band = 4 + #q = math.sqrt(2)*f_0/filter_band * 2 + #f_0 = 18 + duration = 0.1 + hrss = 0.0275 + #hp, hx = SimBurstSineGaussian(q * 2, f_0, hrss, 1, 0, data_dt) + hp, hx = SimBurstGaussian(duration, hrss, delta_t) + hp = TimeSeries.from_lal(hp) + hx = TimeSeries.from_lal(hx) + # We rescale the amplitude to hide or expose it in the data a bit better + hp *= 100. + + pyplot.figure() + pyplot.plot(hp.times, hp, 'k-') + pyplot.xlim([-0.5, 0.5]) + pyplot.ylim([-0.1, 0.1]); + pyplot.xlabel('Time (s)') + pyplot.ylabel('Magnitude') + pyplot.savefig('fakesignal.png') + pyplot.close() + +Inject fake signal into artificial data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Here's the script:: + + random_time = int((start+end)/2.) + st = (random_time-start)*args.sample_rate - len(hp)/2 + en = st + len(hp) + hp.epoch = random_time + ts_data[st:en] += hp + data_list = [ts_data] + ts_data = types.TimeSeries(ts_data.value,delta_t=1.0/args.sample_rate,epoch=start) + +Plotting Data +============= + +Generate a plot of the data time series +--------------------------------------- + +Here's the script:: + + # Include time series element in dictionary + plot = TimeSeriesPlot() + # Create axis in plot + ax = plot.gca() + # Loop over all the time series + for ts in data_list: + # Plot time series for each segment + ax.plot(ts, color='blue') + # Display title + ax.set_title(station) + # Plot activity segments + plot.add_state_segments(SegmentList(full_seglist[station].active),plotargs={'label':'data present','facecolor': 'g','edgecolor': 'k'}) + # Define edges of the x axis + ax.set_xlim(start, end) + # Save figure + plot.savefig('time_series.png',dpi=500) + +Create sound based on the data +------------------------------ + +Here's the script:: + + wout = wave.open("pure_tone.wav", "w") + wout.setnchannels(1) # mono + wout.setsampwidth(4) # 32 bit audio + wout.setframerate(1000) + wout.writeframes(ts[:]) + wout.close() + +Invoking precision issues +------------------------- + +AGG complexity starts to complain with large numbers of points and we somehow invoke precision issues that need to be ameliorated:: + + for d in data_list: + d.x0 = Quantity(int(d.x0.value * 500), d.xunit) + d.dx = Quantity(1, d.xunit) + data_list.coalesce() + for d in data_list: + d.x0 = Quantity(d.x0.value / 500, d.xunit) + d.dx = Quantity(0.002, d.xunit) + +Amplitude Spectral Density (ASD) +-------------------------------- + +Here's the script:: + + # Initialize plotting functionality + plot = SpectrumPlot() + # Loop over all the time series + for d in data_list: + # Generate 8 seconds per FFT with 4 second (50%) overlap + spectrum = d.asd(8, 4) + # Create plotting axis + ax = plot.gca() + # Plot square root of the spectrum + ax.plot(numpy.sqrt(spectrum)) + # Set x axis to log scale + ax.set_xscale('log') + # Set y axis to log scale + ax.set_yscale('log') + # Set x axis limits + ax.set_xlim(1e-1, 500) + # Save figure + plot.savefig("asd.png",dpi=500) + +(Un)normalized Spectrograms +--------------------------- + +The first thing to do is to initialise the plotting axis for both figure as well as some display settings specific to spectrogram and which can be loaded using the `SpectrogramPlot() `_ module from the ``gwpy.plotter`` library:: + + plot = SpectrogramPlot() + ax = plot.gca() + white_plot = SpectrogramPlot() + wax = white_plot.gca() + +The spectrogram is then created using the `spectrogram `_ function from the ``gwpy.timeseries.TimeSeries`` package. This will *calculate the average power spectrogram of this TimeSeries using the specified average spectrum method* (default being the Welch's method). We define the 3 following variables that will be used to construct the spectrogram: + +- ``stride``: number of seconds in single PSD (column of spectrogram), default 20; +- ``fftlength``: number of seconds in single FFT, default 6; +- ``overlap``: number of seconds between FFTs, default 3. + +We can then loop over all the time series made from each loaded HDF5 data file, and construct the spectrogram for each time series. The whitening of the spectrogram is then done by normalisation it, which can be performed using the `ratio `_ method from the ``gwpy.spectrogram.Spectrogram`` library. This will calculate the ratio of the created spectrogram against a specific reference, here we chose the reference to be the median of each spectrum in the given spectrogram: + +.. math:: + \sqrt{S(f,t)}/\sqrt{\overline{S(f)}} + +The script is as follows:: + + for ts in data_list: + if (len(ts) * ts.dt).value < stride: + continue + spec = ts.spectrogram(stride, fftlength=fftlength, overlap=overlap) + ax.plot(spec) + wspec = spec.ratio('median') + wax.plot(wspec, vmin=0.1, vmax=100) + +Finally, the plot can be completed by including the activity period below each figure:: + + ax.set_title(station) + ax.set_xlim(seglist[0][0], seglist[-1][1]) + ax.set_ylim(1e-1, 500) + ax.set_yscale('log') + plot.add_colorbar(log=True) + plot.add_state_segments(SegmentList(full_seglist[station].active),plotargs={'label':'data present','facecolor':'g','edgecolor':'k'}) + plot.savefig("spectrogram.png",dpi=500) + + wax.set_title(station) + wax.set_xlim(seglist[0][0], seglist[-1][1]) + wax.set_ylim(1e-1, 500) + wax.set_yscale('log') + white_plot.add_colorbar(log=True) + white_plot.add_state_segments(SegmentList(full_seglist[station].active),plotargs={'label':'data present','facecolor':'g','edgecolor':'k'}) + white_plot.savefig("whitened_spectrogram.png",dpi=500) + +Excess-Power algorithm +====================== + +General overview +---------------- + +The **Excess Power method** is known as the *optimal detection strategy* to search for burst signals for which only the duration and frequency band are known, which is basically the case for GNOME and its search of Axion-Like Particles (ALP). This method was developed and introduced by `Anderson et al. (200) `_ and has been extensively used in the detection of burst sources of gravitational radiation. A more technical documentation was written by `Brady et al. (2007) `_ describing how the algorithm used by the LIGO collaboration works and how the theory is translated into code. + +We present below a step-by-step procedure followed during the Excess Power search analysis. For a better representation of what is happening, the figure at the end shows how the data is being split and analysed to search for multiple signals of different bandwidth and duration in the time-frequency plane. + +- :ref:`Time domain segmentation and PSD estimate ` + + We first estimate the instrument's noise Power Spectral Density (PSD) by splitting the time-series data into multiple overlapping segments. A periodogram for each segment is calculated separately and then averaged, which will reduce the variance of the individual power measurements. The result is a frequency series where samples are separated in frequency space by :math:`\Delta f` equal to the inverse of a segment’s length and with a high end frequency limit equal to the Nyquist limit. The final power spectrum will help reveal the existence, or the absence, of repetitive patterns and correlation structures in a signal process. + +- :ref:`Comb of frequency channels ` + + We then split the PSD frequency series into multiple channels. For each channel, a frequency domain filter is created with a :math:`\Delta f` determined by the PSD and a total extent in Fourier space that is twice the stated bandwidth of a channel. The result is a list of each channel filter's frequency series. + +- :ref:`Creating analysing blocks ` + + The Excess Power method can lead to moderately-large computational requirements, and it has been found that the computational efficiency of this implementation can be improved upon by considering blocks of data that are much longer than the longest signal time duration. The entire time series is therefore split into separate blocks. We use the length of the segments used for PSD estimate to define the duration of each block. For each block, the time series is c0Aonverted into frequency series which is then filtered by the filter bank throughout all the channels. A time-frequency map is finally created which stores all the filtered frequency series from each channel. + +- :ref:`Creating tiles with different bandwidth ` + + We can now construct tiles with different bandwidth by summing multiple channels together. + +- :ref:`Exploring tiles with different duration ` + + For each given tile's bandwidth, one can investigate different tile's duration. This can be done by exploring different number of degrees of freedom, :math:`d`, which can be calculated as follows: :math:`d=2BT` where :math:`B` and :math:`T` are respectively the bandwidth and duration of the tile. Section 2.2.5 of `Brady et al. `_ gives a great description of how to interpret the number of degrees of freedom. Therefore, by changing the :math:`d`, one can explore multiple tile's duration for different bandwidth. + +- :ref:`Define triggering signal ` + + The energy of each tile in the time-frequency space is calculated and compare to a user-defined threshold value. After defining a tile false alarm probability threshold in Gaussian noise and using the number of degrees of freedom for each tile, one can define a energy threshold value above which a burst trigger can be identified by comparing the energy threshold with the tile's energy in the time-frequency map. A tile energy time frequency map plot similar to Figure 5 in `Pustelny et al. (2013) `_ can then be made which plots the outlying tile energies present in the data. + +.. figure:: ./img/overview.png + + Overview of the Excess Power method and difference between segments, channels, tiles and blocks. + +.. _psdestimate: + +Estimate Power Spectral Density (PSD) +------------------------------------- + +The instrument's noise Power Spectral Density (PSD) will be used to whiten the data and help reveal the existence, or the absence, of repetitive patterns and correlation structures in the signal process. It will also determine the total bandwidth spanned by each of the filters that will subsequently be created. The first thing to do before calculating the PSD is to ensure that the time series data is converted into an array of floating values. :: + + # Convert time series as array of float + data = ts_data.astype(numpy.float64) + +The PSD is calculated by splitting up the signal into overlapping segments and scan through each segment to calculate individual periodogram. The periodograms from each segment are then averaged, reducing the variance of the individual power measurements. In order to proceed, we need to define the average method, ``avg_method``, that will be used to measure the PSD from the data. This can be specified with the ``--psd-estimation`` option. :: + + # Average method to measure PSD from the data + avg_method = args.psd_estimation + +One also needs to specify the length of each segment, ``seg_len``, as well as the separation between 2 consecutive segments, ``seg_stride``. Both parameters can be defined in second units with the ``--psd-segment-length`` and ``--psd-segment-stride`` arguments respectively and can then be converted into sample unit. :: + + # The segment length for PSD estimation in samples + seg_len = int(args.psd_segment_length * args.sample_rate) + # The separation between consecutive segments in samples + seg_stride = int(args.psd_segment_stride * args.sample_rate) + +We then use the `Welch's method `_ to perform the power spectral density estimate using the `welch `_ module from the ``pycbc.psd`` library. What this will do is to compute the discrete Fourier transform for each PSD segment to produce invidual periodograms, and then compute the squared magnitude of the result. The individual periodograms are then averaged using the user-defined average method, ``avg_method``, and return the frequency series, ``fd_psd``, which will store the power measurement for each frequency bin. :: + + # Lifted from the psd.from_cli module + fd_psd = psd.welch(data,avg_method=avg_method,seg_len=seg_len,seg_stride=seg_stride) + # Plot the power spectral density + plot_spectrum(fd_psd) + # We need this for the SWIG functions + lal_psd = fd_psd.lal() + +One can display the power measurements, frequency array and frequency between consecutive samples, :math:`\Delta f` in Hertz, by printing the following variables: :: + + print 'Display power measurements of the first 10 frequency bins' + print fd_psd[:10] + print 'Display central frequency of the first 10 bins' + print fd_psd.sample_frequencies[:10] + print 'Display the frequency separation between bins' + print fd_psd.delta_f + +:math:`\Delta f` corresponds to the inverse of a segment's length which is the smallest frequency (i.e. highest period) of detectable signals in each segment. The frequency range spans from 0 to the Nyquist frequency, i.e. half de the sampling rate. + +Checking filtering settings +--------------------------- + +The first thing to check is that the frequency of the high-pass filter (if defined) is below the minimum frequency of the filter bank. Indeed, a high-pass filter will only let pass frequency that are higher than the cutoff frequency (here defined by the ``strain_high_pass`` argument). If the high pass frequency is greater from the minimum frequency in the filter bank, the signal with frequencies lower than the cutoff frequency will get attenuated. :: + + if args.min_frequency < args.strain_high_pass: + print >>sys.stderr, "Warning: strain high pass frequency %f is greater than the tile minimum frequency %f --- this is likely to cause strange output below the bandpass frequency" % (args.strain_high_pass, args.min_frequency) + +In case the maximum frequency in the filter bank is not defined, we set it to be equal to the Nyquist frequency, i.e. half the sampling rate, which makes sense as a larger signal will not be able to get easily identifiable. :: + + if args.max_frequency is None: + args.max_frequency = args.sample_rate / 2.0 + +If the bandwidth of the finest filter (``--tile-bandwidth`` argument, see section :ref:`construct_args ` or the number of frequency channels (=--channels= argument) is not defined but the total spectral band is (``data_band``), one can then determined all the filter settings as follows: :: + + + if args.tile_bandwidth is None and args.channels is None: + # Exit program with error message + exit("Either --tile-bandwidth or --channels must be specified to set up time-frequency plane") + else: + # Define as assert statement that tile maximum frequency larger than its minimum frequency + assert args.max_frequency >= args.min_frequency + # Define spectral band of data + data_band = args.max_frequency - args.min_frequency + # Check if tile bandwidth or channel is defined + if args.tile_bandwidth is not None: + # Define number of possible filter bands + nchans = args.channels = int(data_band / args.tile_bandwidth) - 1 + elif args.channels is not None: + # Define filter bandwidth + band = args.tile_bandwidth = data_band / (args.channels + 1) + assert args.channels > 1 + +The minimum frequency to be explored can be user-defined by using the ``--min-frequency`` option. :: + + # Lowest frequency of the first filter + flow = args.min_frequency + +Whitening window and spectral correlation +----------------------------------------- + +This part determines how much data on either side of the tukey window is to be discarded. Nominally, this means that one will lose ``window_fraction`` * ``args.psd_segment_length`` to corruption from the window, i.e. this is simply discarded. This is tuned to give an integer offset when used with ``args.psd_segment_length`` equal to 8, smaller windows will have fractions of integers, but larger powers of two will still preseve this (probably not a big deal in the end). :: + + window_fraction = 0 + +The two point spectral correlation is then done with the :ref:`calculate_spectral_correlation ` function which will return both the Tukey window applied to the original time series data and the actual two-point spectral correlation function for the whitened frequency series from the applied whitening window. :: + + # Do two point spectral correlation + window, spec_corr = calculate_spectral_correlation(seg_len,'tukey',window_fraction=window_fraction) + window = window.data.data + window_sigma_sq = numpy.mean(window**2) + # Pre scale the window by its root mean squared -- see eqn 11 of EP document + #window /= numpy.sqrt(window_sigma_sq) + +.. _filterbank: + +Computing the filter bank +------------------------- + +The filter bank will create band-pass filters for each channel in the PSD frequency domain. The :ref:`create_filter_bank ` function will san the bandwidth from the central frequency of the first channel (i.e. flow+band/2) to final frequency of the last channel (i.e. band*nchans) in a increment equal to the frequency band. The filter's total extent in Fourier space is actually twice the stated bandwidth (FWHM). :: + + # Define filters + filter_bank, fdb = create_filter_bank(fd_psd.delta_f, flow+band/2, band, nchans, fd_psd, spec_corr) + +This function will returns 2 arrays: the ``filter_bank`` array which is a list of `COMPLEX16FrequencySeries `_ arrays corresponding to each channel's filter, and the =fdb= array which provides the time-series from each filter. The length of each array is equal to the total number of channel (i.e. =nchans=). The filter's data, :math:`\Delta f` value, and first and last frequencies of any channel's filter can be displayed as followed: :: + + # Print data of first channel's filter + print filter_bank[0].data.data + # Print frequency separation between 2 values in the first channel's filter + print filter_bank[0].deltaF + # Print first frequency of the first channel's filter + print filter_bank[0].f0 + # Print last frequency of the first channel's filter (equal to twice the channel's bandwidth) + print filter_bank[0].f0+(len(filter_bank[0].data.data)-1)*filter_bank[0].deltaF + +Further in the analysis, the following filters will used: +1. ``white_filter_ip``: Whitened filter inner products computed with themselves. +2. ``unwhite_filter_ip``: Unwhitened filter inner products computed with themselves. +3. ``white_ss_ip``: Whitened filter inner products computed between input adjacent filters. +4. ``unwhite_ss_ip``: Unwhitened filter inner products computed between input adjacent filters. + +:: + + # This is necessary to compute the mu^2 normalizations + white_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, None) + unwhite_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, lal_psd) + # These two are needed for the unwhitened mean square sum (hrss) + white_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, None) + unwhite_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, lal_psd) + +Normalization of virtual channel +-------------------------------- + +The virtual channels will be used during the excesspower analysis to explore different frequency ranges around each PSD segments and look for possible triggers. Each channel is renormalized using the :ref:`compute_channel_renomalization ` internal function. :: + + # Initialise dictionary + mu_sq_dict = {} + # nc_sum additional channel adds + for nc_sum in range(0, int(math.log(nchans, 2))): + min_band = (len(filter_bank[0].data.data)-1) * filter_bank[0].deltaF / 2 + print tprint(t0,t1),"Calculation for %d %d Hz channels" % (nc_sum+1, min_band) + nc_sum = 2**nc_sum - 1 + mu_sq_dict[nc_sum] = compute_channel_renomalization(nc_sum, filter_bank, spec_corr, nchans) + +Initialise event list and determine stride boundaries +----------------------------------------------------- + +First of all, we create a table similar than the one made by the LIGO Scientific Collaboration (LSC) where all the information will be stored. Such table is commonly know as ``lsctables``. A pre-defined LSC table can be constructed using ``New`` function from the `glue.ligolw.lsctables `_ module. We use the ``SnglBurstTable`` function for the type of data to be stored and define all the columns we wish to record. :: + + # Create event list for single burst table + event_list = lsctables.New(lsctables.SnglBurstTable, + ['start_time','start_time_ns','peak_time','peak_time_ns', + 'duration','bandwidth','central_freq','chisq_dof', + 'confidence','snr','amplitude','channel','ifo', + 'process_id','event_id','search','stop_time','stop_time_ns']) + +We also need to determine the indexes of both starting and ending times for the first segment to analyse, respectively ``t_idx_min`` and ``t_idx_max``. The default values are considered to be 0 for the starting index and the segment length in sample unit for the ending time index. Also, if the user defines a different starting time than the one from the loaded data, the offset index in sample unit is determined and added the both starting and ending time indexes. :: + + # Determine boundaries of stride in time domain + t_idx_min, t_idx_max = 0, seg_len + # Check if user requested starting time is defined + if args.analysis_start_time is not None: + # Define the time difference in seconds between data and user requested starting times + t_idx_off = args.analysis_start_time - ts_data.start_time + # Calculate the index of the user requested starting point in the data + t_idx_off = int(t_idx_off * args.sample_rate) + else: + # Define index of the starting point as first value in data + t_idx_off = 0 + # Initialise minimum index values as offset starting index + t_idx_min += t_idx_off + # Initialise maximum index values as offset starting index + t_idx_max += t_idx_off + +Finally, the index for the ending time after all the segments have been analysed can be estimated for the user-defined parameter or is defined as the length of the time series data ``ts_data``. :: + + # Check if user requested end time is defined + if args.analysis_end_time is not None: + # Define the time difference between data and user requested ending times + t_idx_max_off = args.analysis_end_time - ts_data.start_time + # Calculate the index of the user requested starting point in the data + t_idx_max_off = int(t_idx_max_off * args.sample_rate) + else: + # Define index of the ending point as the length of data array + t_idx_max_off = len(ts_data) + +.. _analysingblocks: + +Define analysing blocks +----------------------- + +The first thing we do is to calculate the time series for the segment that is covered (``tmp_ts_data``) and redefined the metadata, especially the time of the first sample in seconds which is defined by the ``epoch`` argument and is different for every segment. After plotting the time series for that segment, the data are then converted into frequency series (``fs_data``) using the `to_frequencyseries `_ module from the ``pycbc.types.timeseries.TimeSeries`` library. Finally, the frequency data are then whitened. :: + + # Loop over each data within the user requested time period + while t_idx_max <= t_idx_max_off: + # Define starting and ending time of the segment in seconds + start_time = ts_data.start_time + t_idx_min/float(args.sample_rate) + end_time = ts_data.start_time + t_idx_max/float(args.sample_rate) + print tprint(t0,t1),"Analyzing block %i to %i (%.2f percent)"%(start_time,end_time,100*float(t_idx_max)/float(idx_max_off)) + # Model a withen time series for the block + tmp_ts_data = types.TimeSeries(ts_data[t_idx_min:t_idx_max]*window, 1.0/args.sample_rate,epoch=start_time) + # Save time series in segment repository + segfolder = 'segments/%i-%i'%(start_time,end_time) + os.system('mkdir -p '+segfolder) + plot_ts(tmp_ts_data,fname='%s/ts.png'%(segfolder)) + # Convert times series to frequency series + fs_data = tmp_ts_data.to_frequencyseries() + print tprint(t0,t1),"Frequency series data has variance: %s" % fs_data.data.std()**2 + # Whitening (FIXME: Whiten the filters, not the data) + fs_data.data /= numpy.sqrt(fd_psd) / numpy.sqrt(2 * fd_psd.delta_f) + print tprint(t0,t1),"Whitened frequency series data has variance: %s" % fs_data.data.std()**2 + +Create time-frequency map for each block +---------------------------------------- + +We initialise a 2D zero array for a time-frequency map (``tf_map``) which will be computed for each frequency-domain filter associated to each PSD segment and where the filtered time-series for each frequency channels will be stored. The number of rows corresponds to the total number of frequency channels which is defined by the ``nchans`` variable. The number of columns corresponds to the segment length in samples (i.e. the number of samples covering one segment) which is defined by the ``seg_len`` variable. :: + + # Initialise 2D zero array for time-frequency map + tf_map = numpy.zeros((nchans, seg_len), dtype=numpy.complex128) + +We also initialise a zero vector for a temporary filter bank (``tmp_filter_bank``) that will store, for a given channel, the filter's values from the original filter bank (``filter_bank``) for that channel only. The length of the temporary filter bank is equal to the length of the PSD frequency series (``fd_psd``). :: + + # Initialise 1D zero array + tmp_filter_bank = numpy.zeros(len(fd_psd), dtype=numpy.complex128) + +We then loop over all the frequency channels. While in the loop, we first re-initialise the temporary filter bank with zero values everywhere along the frequency series. We then determine the first and last frequency of each channel and re-define the values of the filter in that frequency range based on the values from the original channel's filter from the original filter bank. :: + + # Loop over all the channels + print tprint(t0,t1),"Filtering all %d channels..." % nchans + for i in range(nchans): + # Reset filter bank series + tmp_filter_bank *= 0.0 + # Index of starting frequency + f1 = int(filter_bank[i].f0/fd_psd.delta_f) + # Index of ending frequency + f2 = int((filter_bank[i].f0 + 2*band)/fd_psd.delta_f)+1 + # (FIXME: Why is there a factor of 2 here?) + tmp_filter_bank[f1:f2] = filter_bank[i].data.data * 2 + +We then extract the frequency series from the filter bank for that channel, which will be used as a template waveform to filter the actual data from the channel. :: + + # Define the template to filter the frequency series with + template = types.FrequencySeries(tmp_filter_bank, delta_f=fd_psd.delta_f, copy=False) + +Finally, we use the `matched_filter_core `_ module from the ``pycbc.filter.matchedfilter`` library to filter the frequency series from the channel. This will return both a time series containing the complex signal-to-noise matched filtered against the data, and a frequency series containing the correlation vector. :: + + # Create filtered series + filtered_series = filter.matched_filter_core(template,fs_data,h_norm=None,psd=None, + low_frequency_cutoff=filter_bank[i].f0, + high_frequency_cutoff=filter_bank[i].f0+2*band) + +The `matched filter `_ is the optimal linear filter for maximizing the signal to noise ratio (SNR) in the presence of additive stochastic noise. The filtered time series is stored in the time-frequency map and can be used to produce a spectrogram of the segment of data being analysed. :: + + # Include filtered series in the map + tf_map[i,:] = filtered_series[0].numpy() + +The time-frequency map is a 2D array with a length that corresponds to the number of channels and a width equal to the number of sample present in one segment of data, i.e. segment's length in seconds times the the sampling rate. The map can finally be plotted with a :math:`\Delta t` corresponding to the sampling period of the original dataset (i.e. inverse of the original sampling rate), and :math:`\Delta f` is equal to the bandwidth of one channel. :: + + plot_spectrogram(numpy.abs(tf_map).T,tmp_ts_data.delta_t,fd_psd.delta_f,ts_data.sample_rate,start_time,end_time,fname='%s/tf.png'%(segfolder)) + +.. _tilebandwidth: + +Constructing tiles of different bandwidth +----------------------------------------- + +First and foremost, we define a clipping region in the data to be used to remove window corruption, this is non-zero if the ``window_fraction`` variable is set to a non-zero value. :: + + print tprint(t0,t1),"Beginning tile construction..." + # Clip the boundaries to remove window corruption + clip_samples = int(args.psd_segment_length * window_fraction * args.sample_rate / 2) + +In order to perform a multi-resolution search, tiles of many different bandwidths and durations will be scanned. We first need to setup a loop such that the maximum number of additional channel is equal to the base 2 logarithm of the total number of channels. The number of narrow band channels to be summed (``nc_sum``) would therefore be equal to 2 to the power of the current quantity of additional channels. :: + + for nc_sum in range(0, int(math.log(nchans, 2)))[::-1]: # nc_sum additional channel adds + nc_sum = 2**nc_sum - 1 + print tprint(t0,t1,t2),"Summing %d narrow band channels..." % (nc_sum+1) + +The undersampling rate for this tile can be calculated using the channel frequency band and the number of narrow band channels to be summed such that the bandwidth of the tile is equal to ``band * (nc_sum + 1)``. :: + + us_rate = int(round(1.0 / (2 * band*(nc_sum+1) * ts_data.delta_t))) + print >>sys.stderr, "Undersampling rate for this level: %f" % (args.sample_rate/us_rate) + +"Virtual" wide bandwidth channels are constructed by summing the samples from multiple channels, and correcting for the overlap between adjacent channel filters. We then define the normalised channel at the current level and create a time frequency map for this tile using the :ref:`make_indp_tiles ` internal function. In other word, we are constructing multiple sub-tiles for which we can determined the respective energy in the given frequency band. :: + + mu_sq = mu_sq_dict[nc_sum] + sys.stderr.write("\t...calculating tiles...") + if clip_samples > 0: + tiles = make_indp_tiles(tf_map[:,clip_samples:-clip_samples:us_rate], nc_sum, mu_sq) + else: + tiles = make_indp_tiles(tf_map[:,::us_rate], nc_sum, mu_sq) + sys.stderr.write(" TF-plane is %dx%s samples... " % tiles.shape) + print >>sys.stderr, " done" + print "Tile energy mean: %f, var %f" % (numpy.mean(tiles), numpy.var(tiles)) + +.. _tileduration: + +Explore multiple tile durations +------------------------------- + +Now that we create a tile with a specific bandwidth, we can start exploring different durations for the tile. We will start checking if the user manually defined a value for the longest duration tile to compute, which can be done using the ``--max-duration`` argument. If not, the value will be set to 32. :: + + if args.max_duration is not None: + max_dof = 2 * args.max_duration * (band * (nc_sum+1)) + else: + max_dof = 32 + assert max_dof >= 2 + +Since we produce (initially) tiles with 1 degree of freedom, the duration goes as one over twice the bandwidth. :: + + print "\t\t...getting longer durations..." + #for j in [2**l for l in xrange(1, int(math.log(max_dof, 2))+1)]: + for j in [2**l for l in xrange(0, int(math.log(max_dof, 2)))]: + sys.stderr.write("\t\tSumming DOF = %d ..." % (2*j)) + #tlen = tiles.shape[1] - j + 1 + tlen = tiles.shape[1] - 2*j + 1 + 1 + if tlen <= 0: + print >>sys.stderr, " ...not enough samples." + continue + dof_tiles = numpy.zeros((tiles.shape[0], tlen)) + #:sum_filter = numpy.ones(j) + # FIXME: This is the correct filter for 50% overlap + sum_filter = numpy.array([1,0] * (j-1) + [1]) + #sum_filter = numpy.array([1,0] * int(math.log(j, 2)-1) + [1]) + for f in range(tiles.shape[0]): + # Sum and drop correlate tiles + # FIXME: don't drop correlated tiles + #output = numpy.convolve(tiles[f,:], sum_filter, 'valid') + dof_tiles[f] = fftconvolve(tiles[f], sum_filter, 'valid') + print >>sys.stderr, " done" + print "Summed tile energy mean: %f, var %f" % (numpy.mean(dof_tiles), numpy.var(dof_tiles)) + level_tdiff = time.time() - tdiff + print >>sys.stderr, "Done with this resolution, total %f" % level_tdiff + +Finally, the bandwidth and duration of the tile can be defined as followed: :: + + # Current bandwidth of the time-frequency map tiles + current_band = band * (nc_sum + 1) + # How much each "step" is in the frequency domain -- almost + # assuredly the fundamental bandwidth + df = current_band + # How much each "step" is in the time domain -- under sampling rate + # FIXME: THis won't work if the sample rate isn't a power of 2 + dt = 1.0 / 2 / (2 * current_band) * 2 + full_band = 250 + dt = current_band / full_band * ts_data.sample_rate + dt = 1.0/dt + # Duration is fixed by the NDOF and bandwidth + duration = j / 2.0 / current_band + +.. _triggerfinding: + +Trigger finding +--------------- + +In order to find any trigger in the data, we first need to set a false alarm probability threshold in Gaussian noise above which signal will be distinguished from the noise. Such threshold can be determined by using the /inverse survival function/ method from the `scipy.stats.chi2 `_ package. :: + + threshold = scipy.stats.chi2.isf(args.tile_fap, j) + print "Threshold for this level: %f" % threshold + #if numpy.any(dof_tiles > threshold): + #plot_spectrogram(dof_tiles.T) + #import pdb; pdb.set_trace() + +Once the threshold is set, one can then run the :ref:`trigger_list_from_map ` function to quickly find the trigger signal from the ``dof_tiles`` array that :: + + # Since we clip the data, the start time needs to be adjusted accordingly + window_offset_epoch = fs_data.epoch + args.psd_segment_length * window_fraction / 2 + trigger_list_from_map(dof_tiles, event_list, threshold, window_offset_epoch, filter_bank[0].f0 + band/2, duration, current_band, df, dt, None) + for event in event_list[::-1]: + if event.amplitude != None: + continue + etime_min_idx = float(event.get_start()) - float(fs_data.epoch) + etime_min_idx = int(etime_min_idx / tmp_ts_data.delta_t) + etime_max_idx = float(event.get_start()) - float(fs_data.epoch) + event.duration + etime_max_idx = int(etime_max_idx / tmp_ts_data.delta_t) + # (band / 2) to account for sin^2 wings from finest filters + flow_idx = int((event.central_freq - event.bandwidth / 2 - (band / 2) - flow) / band) + fhigh_idx = int((event.central_freq + event.bandwidth / 2 + (band / 2) - flow) / band) + # TODO: Check that the undersampling rate is always commensurate + # with the indexing: that is to say that + # mod(etime_min_idx, us_rate) == 0 always + z_j_b = tf_map[flow_idx:fhigh_idx,etime_min_idx:etime_max_idx:us_rate] + # FIXME: Deal with negative hrss^2 -- e.g. remove the event + try: + event.amplitude = measure_hrss(z_j_b, unwhite_filter_ip[flow_idx:fhigh_idx], unwhite_ss_ip[flow_idx:fhigh_idx-1], white_ss_ip[flow_idx:fhigh_idx-1], fd_psd.delta_f, tmp_ts_data.delta_t, len(filter_bank[0].data.data), event.chisq_dof) + except ValueError: + event.amplitude = 0 + + print "Total number of events: %d" % len(event_list) + +Switch to new block +------------------- + +The following will move the frequency band to the next segment: :: + + tdiff = time.time() - tdiff + print "Done with this block: total %f" % tdiff + + t_idx_min += int(seg_len * (1 - window_fraction)) + t_idx_max += int(seg_len * (1 - window_fraction)) + +Extracting GPS time range +------------------------- + +We use the `LIGOTimeGPS `_ structure from the =glue.lal= package to /store the starting and ending time in the dataset to nanosecond precision and synchronized to the Global Positioning System time reference/. Once both times are defined, the range of value is stored in a semi-open interval using the `segment `_ module from the =glue.segments= package. :: + + # Starting epoch relative to GPS starting epoch + start_time = LIGOTimeGPS(args.analysis_start_time or args.gps_start_time) + # Ending epoch relative to GPS ending epoch + end_time = LIGOTimeGPS(args.analysis_end_time or args.gps_end_time) + # Represent the range of values in the semi-open interval + inseg = segment(start_time,end_time) + +Prepare output file for given time range +---------------------------------------- + +:: + + xmldoc = ligolw.Document() + xmldoc.appendChild(ligolw.LIGO_LW()) + + ifo = args.channel_name.split(":")[0] + proc_row = register_to_xmldoc(xmldoc, __program__, args.__dict__, ifos=[ifo],version=glue.git_version.id, cvs_repository=glue.git_version.branch, cvs_entry_time=glue.git_version.date) + + # Figure out the data we actually analyzed + outseg = determine_output_segment(inseg, args.psd_segment_length, args.sample_rate, window_fraction) + + ss = append_search_summary(xmldoc, proc_row, ifos=(station,), inseg=inseg, outseg=outseg) + + for sb in event_list: + sb.process_id = proc_row.process_id + sb.search = proc_row.program + #sb.ifo, sb.channel = args.channel_name.split(":") + sb.ifo, sb.channel = station, setname + + xmldoc.childNodes[0].appendChild(event_list) + fname = make_filename(station, inseg) + + utils.write_filename(xmldoc, fname, gz=fname.endswith("gz"), verbose=True) + +Plot trigger results +-------------------- + +:: + + events = SnglBurstTable.read(fname+'.gz') + #del events[10000:] + plot = events.plot('time', 'central_freq', "duration", "bandwidth", color="snr") + #plot = events.plot('time', 'central_freq', color='snr') + #plot.set_yscale("log") + plot.set_ylim(1e-0, 250) + t0 = 1153742417 + plot.set_xlim(t0 + 0*60, t0 + 1*60) + #plot.set_xlim(t0 + 28, t0 + 32) + pyplot.axvline(t0 + 30, color='r') + cb = plot.add_colorbar(cmap='viridis') + plot.savefig("triggers.png") + +Module Access +============= + +Extract Magnetic Field Data +--------------------------- + +Extract magnetic field data from HDF5 files. + +.. currentmodule:: gdas.retrieve + +.. autosummary:: + :toctree: generated/ + + magfield + file_to_segment + construct_utc_from_metadata + generate_timeseries + create_activity_list + retrieve_data_timeseries + retrieve_channel_data + +Plotting routines +----------------- + +Methods to produce time-frequency plots and others + +.. currentmodule:: gdas.plots + +.. autosummary:: + :toctree: generated/ + + plot_activity + plot_time_series + plot_asd + plot_whitening + plot_ts + plot_spectrum + plot_spectrogram + plot_spectrogram_from_ts + plot_triggers + +Excess Power Search Analysis +---------------------------- + +Main class to do excess-power search analysis + +.. currentmodule:: gdas.epower + +.. autosummary:: + :toctree: generated/ + + excess_power + check_filtering_settings + calculate_psd + calculate_spectral_correlation + create_filter_bank + convert_to_time_domain + identify_block + create_tf_plane + compute_filter_ips_self + compute_filter_ips_adjacent + compute_channel_renormalization + measure_hrss + measure_hrss_slowly + measure_hrss_poorly + trigger_list_from_map + determine_output_segment + make_tiles + make_indp_tiles + make_filename + construct_tiles + create_tile_duration + create_xml + +Utilities +--------- + +Independent routines to do various other things + +.. currentmodule:: gdas.utils + +.. autosummary:: + :toctree: generated/ + + create_sound + + +.. _file_to_segment: + +.. ** Extract segment information +.. +.. The starting and ending UTC times for a specific HDF5 file are determined by using the =Date=, =t0= and =t1= attributes from the metadata. The [[construct_utc_from_metadata][=construct_utc_from_metadata=]] function is then used to calculate the UTC time. Finally, the [[http://software.ligo.org/docs/glue/glue.__segments.segment-class.html][=segment=]] module from the =glue.segments= library is used to represent the range of times in a semi-open interval. +.. +.. #+BEGIN_SRC python +.. def file_to_segment(hfile,segname): +.. # Extract all atributes from the data +.. attrs = hfile[segname].attrs +.. # Define each attribute +.. dstr, t0, t1 = attrs["Date"], attrs["t0"], attrs["t1"] +.. # Construct GPS starting time from data +.. start_utc = construct_utc_from_metadata(dstr, t0) +.. # Construct GPS starting time from data +.. end_utc = construct_utc_from_metadata(dstr, t1) +.. # Represent the range of times in the semi-open interval +.. return segment(start_utc,end_utc) +.. #+END_SRC +.. +.. ** Constructing UTC from metadata +.. <> +.. +.. #+BEGIN_SRC python +.. def construct_utc_from_metadata(datestr, t0str): +.. instr = "%d-%d-%02dT" % tuple(map(int, datestr.split('/'))) +.. instr += t0str +.. t = Time(instr, format='isot', scale='utc') +.. return t.gps +.. #+END_SRC +.. +.. ** Generate time series +.. <> +.. +.. #+BEGIN_SRC python +.. def generate_timeseries(data_list, setname="MagneticFields"): +.. full_data = TimeSeriesList() +.. for seg in sorted(data_list): +.. hfile = h5py.File(data_list[seg], "r") +.. full_data.append(retrieve_data_timeseries(hfile, "MagneticFields")) +.. hfile.close() +.. return full_data +.. #+END_SRC +.. +.. ** Retrieve data time series +.. <> +.. +.. #+BEGIN_SRC python +.. def retrieve_data_timeseries(hfile, setname): +.. dset = hfile[setname] +.. sample_rate = dset.attrs["SamplingRate(Hz)"] +.. gps_epoch = construct_utc_from_metadata(dset.attrs["Date"], dset.attrs["t0"]) +.. data = retrieve_channel_data(hfile, setname) +.. ts_data = TimeSeries(data, sample_rate=sample_rate, epoch=gps_epoch) +.. return ts_data +.. #+END_SRC +.. +.. ** Retrieve channel data +.. <> +.. +.. #+BEGIN_SRC python +.. def retrieve_channel_data(hfile, setname): +.. return hfile[setname][:] +.. #+END_SRC +.. +.. .. _calculate_spectral_correlation: +.. +.. ** Two point spectral correlation +.. +.. For our data, we apply a Tukey window whose flat bit corresponds to =window_fraction= (in percentage) of the segment length (in samples) used for PSD estimation (i.e. =fft_window_len=). This can be done by using the [[http://software.ligo.org/docs/lalsuite/lal/_window_8c_source.html#l00597][=CreateTukeyREAL8Window=]] module from the =lal= library. +.. +.. #+BEGIN_SRC python +.. def calculate_spectral_correlation(fft_window_len, wtype='hann', window_fraction=None): +.. if wtype == 'hann': +.. window = lal.CreateHannREAL8Window(fft_window_len) +.. elif wtype == 'tukey': +.. window = lal.CreateTukeyREAL8Window(fft_window_len, window_fraction) +.. else: +.. raise ValueError("Can't handle window type %s" % wtype) +.. #+END_SRC +.. +.. Once the window is built, a new frequency plan is created which will help performing a [[http://fourier.eng.hmc.edu/e101/lectures/fourier_transform_d/node1.html][forward transform]] on the data. This is done with the [[http://software.ligo.org/docs/lalsuite/lal/group___real_f_f_t__h.html#gac4413752db2d19cbe48742e922670af4][=CreateForwardREAL8FFTPlan=]] module which takes as argument the total number of points in the real data and the measurement level for plan creation (here 1 stands for measuring the best plan). +.. +.. #+BEGIN_SRC python +.. fft_plan = lal.CreateForwardREAL8FFTPlan(len(window.data.data), 1) +.. #+END_SRC +.. +.. We can finally compute and return the two-point spectral correlation function for the whitened frequency series (=fft_plan=) from the window applied to the original time series using the [[http://software.ligo.org/docs/lalsuite/lal/group___time_freq_f_f_t__h.html#ga2bd5c4258eff57cc80103d2ed489e076][=REAL8WindowTwoPointSpectralCorrelation=]] module. +.. +.. #+BEGIN_SRC python +.. return window, lal.REAL8WindowTwoPointSpectralCorrelation(window, fft_plan) +.. #+END_SRC +.. +.. ** Create filter bank +.. <> +.. +.. The construction of a filter bank is fairly simple. For each channel, a frequency domain channel filter function will be created using the [[http://software.ligo.org/docs/lalsuite/lalburst/group___e_p_search__h.html#ga899990cbd45111ba907772650c265ec9][=CreateExcessPowerFilter=]] module from the =lalburst= package. Each channel filter is divided by the square root of the PSD frequency series prior to normalization, which has the effect of de-emphasizing frequency bins with high noise content, and is called "over whitening". The data and metadata are finally stored in the =filter_fseries= and =filter_bank= arrays respectively. Finally, we store on a final array, called =np_filters= the all time-series generated from each filter so that we can plot them afterwards +.. +.. #+BEGIN_SRC python +.. def create_filter_bank(delta_f, flow, band, nchan, psd, spec_corr): +.. lal_psd = psd.lal() +.. lal_filters, np_filters = [],[] +.. for i in range(nchan): +.. lal_filter = lalburst.CreateExcessPowerFilter(flow + i*band, band, lal_psd, spec_corr) +.. np_filters.append(Spectrum.from_lal(lal_filter)) +.. lal_filters.append(lal_filter) +.. return filter_fseries, lal_filters, np_filters +.. #+END_SRC +.. +.. ** Compute filter inner products with themselves +.. <> +.. #+BEGIN_SRC python +.. def compute_filter_ips_self(lal_filters, spec_corr, psd=None): +.. """ +.. Compute a set of inner products of input filters with themselves. If psd +.. argument is given, the unwhitened filter inner products will be returned. +.. """ +.. return numpy.array([lalburst.ExcessPowerFilterInnerProduct(f, f, spec_corr, psd) for f in lal_filters]) +.. #+END_SRC +.. +.. ** Compute filter inner products with adjecant filters +.. <> +.. +.. #+BEGIN_SRC python +.. def compute_filter_ips_adjacent(lal_filters, spec_corr, psd=None): +.. """ +.. Compute a set of filter inner products between input adjacent filters. +.. If psd argument is given, the unwhitened filter inner products will be +.. returned. The returned array index is the inner product between the +.. lal_filter of the same index, and its (array) adjacent filter --- assumed +.. to be the frequency adjacent filter. +.. """ +.. return numpy.array([lalburst.ExcessPowerFilterInnerProduct(f1, f2, spec_corr, psd) for f1, f2 in zip(lal_filters[:-1], lal_filters[1:])]) +.. #+END_SRC +.. +.. .. _compute_channel_renomalization: +.. +.. Compute channel renormalization +.. ------------------------------- +.. +.. Compute the renormalization for the base filters up to a given bandwidth. +.. +.. #+BEGIN_SRC python +.. def compute_channel_renomalization(nc_sum, lal_filters, spec_corr, nchans, verbose=True): +.. mu_sq = (nc_sum+1)*numpy.array([lalburst.ExcessPowerFilterInnerProduct(f, f, spec_corr, None) for f in lal_filters]) +.. # Uncomment to get all possible frequency renormalizations +.. #for n in xrange(nc_sum, nchans): # channel position index +.. for n in xrange(nc_sum, nchans, nc_sum+1): # channel position index +.. for k in xrange(0, nc_sum): # channel sum index +.. # FIXME: We've precomputed this, so use it instead +.. mu_sq[n] += 2*lalburst.ExcessPowerFilterInnerProduct(lal_filters[n-k], lal_filters[n-1-k], spec_corr, None) +.. #print mu_sq[nc_sum::nc_sum+1] +.. return mu_sq +.. #+END_SRC +.. +.. ** Measure root-sum-square strain (hrss) +.. <> +.. +.. #+BEGIN_SRC python +.. def measure_hrss(z_j_b, uw_ss_ii, uw_ss_ij, w_ss_ij, delta_f, delta_t, filter_len, dof): +.. """ +.. Approximation of unwhitened sum of squares signal energy in a given EP tile. +.. See T1200125 for equation number reference. +.. z_j_b - time frequency map block which the constructed tile covers +.. uw_ss_ii - unwhitened filter inner products +.. uw_ss_ij - unwhitened adjacent filter inner products +.. w_ss_ij - whitened adjacent filter inner products +.. delta_f - frequency binning of EP filters +.. delta_t - native time resolution of the time frequency map +.. filter_len - number of samples in a fitler +.. dof - degrees of freedom in the tile (twice the time-frequency area) +.. """ +.. s_j_b_avg = uw_ss_ii * delta_f / 2 +.. # unwhitened sum of squares of wide virtual filter +.. s_j_nb_avg = uw_ss_ii.sum() / 2 + uw_ss_ij.sum() +.. s_j_nb_avg *= delta_f +.. s_j_nb_denom = s_j_b_avg.sum() + 2 * 2 / filter_len * \ +.. numpy.sum(numpy.sqrt(s_j_b_avg[:-1] * s_j_b_avg[1:]) * w_ss_ij) +.. # eqn. 62 +.. uw_ups_ratio = s_j_nb_avg / s_j_nb_denom +.. # eqn. 63 -- approximation of unwhitened signal energy time series +.. # FIXME: The sum in this equation is over nothing, but indexed by frequency +.. # I'll make that assumption here too. +.. s_j_nb = numpy.sum(z_j_b.T * numpy.sqrt(s_j_b_avg), axis=0) +.. s_j_nb *= numpy.sqrt(uw_ups_ratio / filter_len * 2) +.. # eqn. 64 -- approximate unwhitened signal energy minus noise contribution +.. # FIXME: correct axis of summation? +.. return math.sqrt(numpy.sum(numpy.absolute(s_j_nb)**2) * delta_t - s_j_nb_avg * dof * delta_t) +.. #+END_SRC +.. +.. ** Unwhitened inner products filtering +.. <> +.. +.. #+BEGIN_SRC python +.. # < s^2_j(f_1, b) > = 1 / 2 / N * \delta_t EPIP{\Theta, \Theta; P} +.. def uw_sum_sq(filter1, filter2, spec_corr, psd): +.. return lalburst.ExcessPowerFilterInnerProduct(filter1, filter2, spec_corr, psd) +.. #+END_SRC +.. +.. ** Unwhitened sum of squares signal +.. <> +.. +.. #+BEGIN_SRC python +.. def measure_hrss_slowly(z_j_b, lal_filters, spec_corr, psd, delta_t, dof): +.. """ +.. Approximation of unwhitened sum of squares signal energy in a given EP tile. +.. See T1200125 for equation number reference. NOTE: This function is deprecated +.. in favor of measure_hrss, since it requires recomputation of many inner products, +.. making it particularly slow. +.. """ +.. # FIXME: Make sure you sum in time correctly +.. # Number of finest bands in given tile +.. nb = len(z_j_b) +.. # eqn. 56 -- unwhitened mean square of filter with itself +.. uw_ss_ii = numpy.array([uw_sum_sq(lal_filters[i], lal_filters[i], spec_corr, psd) for i in range(nb)]) +.. s_j_b_avg = uw_ss_ii * lal_filters[0].deltaF / 2 +.. # eqn. 57 -- unwhitened mean square of filter with adjacent filter +.. uw_ss_ij = numpy.array([uw_sum_sq(lal_filters[i], lal_filters[i+1], spec_corr, psd) for i in range(nb-1)]) +.. # unwhitened sum of squares of wide virtual filter +.. s_j_nb_avg = uw_ss_ii.sum() / 2 + uw_ss_ij.sum() +.. s_j_nb_avg *= lal_filters[0].deltaF +.. +.. # eqn. 61 +.. w_ss_ij = numpy.array([uw_sum_sq(lal_filters[i], lal_filters[i+1], spec_corr, None) for i in range(nb-1)]) +.. s_j_nb_denom = s_j_b_avg.sum() + 2 * 2 / len(lal_filters[0].data.data) * \ +.. (numpy.sqrt(s_j_b_avg[:-1] * s_j_b_avg[1:]) * w_ss_ij).sum() +.. +.. # eqn. 62 +.. uw_ups_ratio = s_j_nb_avg / s_j_nb_denom +.. +.. # eqn. 63 -- approximation of unwhitened signal energy time series +.. # FIXME: The sum in this equation is over nothing, but indexed by frequency +.. # I'll make that assumption here too. +.. s_j_nb = numpy.sum(z_j_b.T * numpy.sqrt(s_j_b_avg), axis=0) +.. s_j_nb *= numpy.sqrt(uw_ups_ratio / len(lal_filters[0].data.data) * 2) +.. # eqn. 64 -- approximate unwhitened signal energy minus noise contribution +.. # FIXME: correct axis of summation? +.. return math.sqrt((numpy.absolute(s_j_nb)**2).sum() * delta_t - s_j_nb_avg * dof * delta_t) +.. #+END_SRC +.. +.. ** Measure root-mean square strain poorly +.. <> +.. +.. #+BEGIN_SRC python +.. def measure_hrss_poorly(tile_energy, sub_psd): +.. return math.sqrt(tile_energy / numpy.average(1.0 / sub_psd) / 2) +.. #+END_SRC +.. +.. ** List triggers from map +.. <> +.. +.. #+BEGIN_SRC python +.. def trigger_list_from_map(tfmap, event_list, threshold, start_time, start_freq, duration, band, df, dt, psd=None): +.. +.. # FIXME: If we don't convert this the calculation takes forever --- but we should convert it once and handle deltaF better later +.. if psd is not None: +.. npy_psd = psd.numpy() +.. +.. start_time = LIGOTimeGPS(float(start_time)) +.. ndof = 2 * duration * band +.. +.. spanf, spant = tfmap.shape[0] * df, tfmap.shape[1] * dt +.. print "Processing %.2fx%.2f time-frequency map." % (spant, spanf) +.. +.. for i, j in zip(*numpy.where(tfmap > threshold)): +.. event = event_list.RowType() +.. +.. # The points are summed forward in time and thus a `summed point' is the +.. # sum of the previous N points. If this point is above threshold, it +.. # corresponds to a tile which spans the previous N points. However, th +.. # 0th point (due to the convolution specifier 'valid') is actually +.. # already a duration from the start time. All of this means, the + +.. # duration and the - duration cancels, and the tile 'start' is, by +.. # definition, the start of the time frequency map if j = 0 +.. # FIXME: I think this needs a + dt/2 to center the tile properly +.. event.set_start(start_time + float(j * dt)) +.. event.set_stop(start_time + float(j * dt) + duration) +.. event.set_peak(event.get_start() + duration / 2) +.. event.central_freq = start_freq + i * df + 0.5 * band +.. +.. event.duration = duration +.. event.bandwidth = band +.. event.chisq_dof = ndof +.. +.. event.snr = math.sqrt(tfmap[i,j] / event.chisq_dof - 1) +.. # FIXME: Magic number 0.62 should be determine empircally +.. event.confidence = -lal.LogChisqCCDF(event.snr * 0.62, event.chisq_dof * 0.62) +.. if psd is not None: +.. # NOTE: I think the pycbc PSDs always start at 0 Hz --- check +.. psd_idx_min = int((event.central_freq - event.bandwidth / 2) / psd.delta_f) +.. psd_idx_max = int((event.central_freq + event.bandwidth / 2) / psd.delta_f) +.. +.. # FIXME: heuristically this works better with E - D -- it's all +.. # going away with the better h_rss calculation soon anyway +.. event.amplitude = measure_hrss_poorly(tfmap[i,j] - event.chisq_dof, npy_psd[psd_idx_min:psd_idx_max]) +.. else: +.. event.amplitude = None +.. +.. event.process_id = None +.. event.event_id = event_list.get_next_id() +.. event_list.append(event) +.. #+END_SRC +.. +.. ** Determine output segment +.. <> +.. +.. #+BEGIN_SRC python +.. def determine_output_segment(inseg, dt_stride, sample_rate, window_fraction=0.0): +.. """ +.. Given an input data stretch segment inseg, a data block stride dt_stride, the data sample rate, and an optional window_fraction, return the amount of data that can be processed without corruption effects from the window. +.. +.. If window_fration is set to 0 (default), assume no windowing. +.. """ +.. # Amount to overlap successive blocks so as not to lose data +.. window_overlap_samples = window_fraction * sample_rate +.. outseg = inseg.contract(window_fraction * dt_stride / 2) +.. +.. # With a given dt_stride, we cannot process the remainder of this data +.. remainder = math.fmod(abs(outseg), dt_stride * (1 - window_fraction)) +.. # ...so make an accounting of it +.. outseg = segment(outseg[0], outseg[1] - remainder) +.. return outseg +.. #+END_SRC +.. +.. ** Make tiles +.. <> +.. +.. #+BEGIN_SRC python +.. def make_tiles(tf_map, nc_sum, mu_sq): +.. tiles = numpy.zeros(tf_map.shape) +.. sum_filter = numpy.ones(nc_sum+1) +.. # Here's the deal: we're going to keep only the valid output and +.. # it's *always* going to exist in the lowest available indices +.. for t in xrange(tf_map.shape[1]): +.. # Sum and drop correlate tiles +.. # FIXME: don't drop correlated tiles +.. output = numpy.convolve(tf_map[:,t], sum_filter, 'valid')[::nc_sum+1] +.. #output = fftconvolve(tf_map[:,t], sum_filter, 'valid')[::nc_sum+1] +.. tiles[:len(output),t] = numpy.absolute(output) / math.sqrt(2) +.. return tiles[:len(output)]**2 / mu_sq[nc_sum::nc_sum+1].reshape(-1, 1) +.. #+END_SRC +.. +.. ** Create a time frequency map +.. <> +.. +.. In this function, we create a time frequency map with resolution similar than =tf_map= but rescale by a factor of =nc_sum= + 1. All tiles will be independent up to overlap from the original tiling. The =mu_sq= is applied to the resulting addition to normalize the outputs to be zero-mean unit-variance Gaussian variables (if the input is Gaussian). +.. +.. #+BEGIN_SRC python +.. def make_indp_tiles(tf_map, nc_sum, mu_sq): +.. tiles = tf_map.copy() +.. # Here's the deal: we're going to keep only the valid output and +.. # it's *always* going to exist in the lowest available indices +.. stride = nc_sum + 1 +.. for i in xrange(tiles.shape[0]/stride): +.. numpy.absolute(tiles[stride*i:stride*(i+1)].sum(axis=0), tiles[stride*(i+1)-1]) +.. return tiles[nc_sum::nc_sum+1].real**2 / mu_sq[nc_sum::nc_sum+1].reshape(-1, 1) +.. #+END_SRC +.. +.. ** Create output filename +.. <> +.. +.. #+BEGIN_SRC python +.. def make_filename(ifo, seg, tag="excesspower", ext="xml.gz"): +.. if isinstance(ifo, str): +.. ifostr = ifo +.. else: +.. ifostr = "".join(ifo) +.. st_rnd, end_rnd = int(math.floor(seg[0])), int(math.ceil(seg[1])) +.. dur = end_rnd - st_rnd +.. return "%s-%s-%d-%d.%s" % (ifostr, tag, st_rnd, dur, ext) +.. #+END_SRC + diff --git a/conf.py b/sphinx/conf.py similarity index 100% rename from conf.py rename to sphinx/conf.py diff --git a/sphinx/epower_overview.rst b/sphinx/epower_overview.rst new file mode 100644 index 0000000..8c24321 --- /dev/null +++ b/sphinx/epower_overview.rst @@ -0,0 +1,47 @@ +Excess Power - Overview +======================= + +The **Excess Power method** is known as the *optimal detection strategy* to search for burst signals for which only the duration and frequency band are known, which is basically the case for GNOME and its search of Axion-Like Particles (ALP). This method was developed and introduced by `Anderson et al. (200) `_ and has been extensively used in the detection of burst sources of gravitational radiation. A more technical documentation was written by `Brady et al. (2007) `_ describing how the algorithm used by the LIGO collaboration works and how the theory is translated into code. + +We present below a step-by-step procedure followed during the Excess Power search analysis. For a better representation of what is happening, the figure at the end shows how the data is being split and analysed to search for multiple signals of different bandwidth and duration in the time-frequency plane. + +- :ref:`Time domain segmentation and PSD estimate ` + + We first estimate the instrument's noise Power Spectral Density (PSD) by splitting the time-series data into multiple overlapping segments. A periodogram for each segment is calculated separately and then averaged, which will reduce the variance of the individual power measurements. The result is a frequency series where samples are separated in frequency space by :math:`\Delta f` equal to the inverse of a segment’s length and with a high end frequency limit equal to the Nyquist limit. The final power spectrum will help reveal the existence, or the absence, of repetitive patterns and correlation structures in a signal process. + +- :ref:`Comb of frequency channels ` + + We then split the PSD frequency series into multiple channels. For each channel, a frequency domain filter is created with a :math:`\Delta f` determined by the PSD and a total extent in Fourier space that is twice the stated bandwidth of a channel. The result is a list of each channel filter's frequency series. + +- :ref:`Creating analysing blocks ` + + The Excess Power method can lead to moderately-large computational requirements, and it has been found that the computational efficiency of this implementation can be improved upon by considering blocks of data that are much longer than the longest signal time duration. The entire time series is therefore split into separate blocks. We use the length of the segments used for PSD estimate to define the duration of each block. For each block, the time series is c0Aonverted into frequency series which is then filtered by the filter bank throughout all the channels. A time-frequency map is finally created which stores all the filtered frequency series from each channel. + +- :ref:`Creating tiles with different bandwidth ` + + We can now construct tiles with different bandwidth by summing multiple channels together. + +- :ref:`Exploring tiles with different duration ` + + For each given tile's bandwidth, one can investigate different tile's duration. This can be done by exploring different number of degrees of freedom, :math:`d`, which can be calculated as follows: :math:`d=2BT` where :math:`B` and :math:`T` are respectively the bandwidth and duration of the tile. Section 2.2.5 of `Brady et al. `_ gives a great description of how to interpret the number of degrees of freedom. Therefore, by changing the :math:`d`, one can explore multiple tile's duration for different bandwidth. + +- :ref:`Define triggering signal ` + + The energy of each tile in the time-frequency space is calculated and compare to a user-defined threshold value. After defining a tile false alarm probability threshold in Gaussian noise and using the number of degrees of freedom for each tile, one can define a energy threshold value above which a burst trigger can be identified by comparing the energy threshold with the tile's energy in the time-frequency map. A tile energy time frequency map plot similar to Figure 5 in `Pustelny et al. (2013) `_ can then be made which plots the outlying tile energies present in the data. + +.. figure:: ./img/overview.png + + Overview of the Excess Power method and difference between segments, channels, tiles and blocks. + +Code access +----------- + +.. currentmodule:: gdas.epower + +.. autosummary:: + :toctree: generated/ + + excess_power + + + diff --git a/sphinx/epower_step1_checkfilt.rst b/sphinx/epower_step1_checkfilt.rst new file mode 100644 index 0000000..3e0163f --- /dev/null +++ b/sphinx/epower_step1_checkfilt.rst @@ -0,0 +1,50 @@ +Excess Power - Step 1: Checking filtering settings +================================================== + +The first thing to check is that the frequency of the high-pass filter (if defined) is below the minimum frequency of the filter bank. Indeed, a high-pass filter will only let pass frequency that are higher than the cutoff frequency (here defined by the ``strain_high_pass`` argument). If the high pass frequency is greater from the minimum frequency in the filter bank, the signal with frequencies lower than the cutoff frequency will get attenuated. :: + + if args.min_frequency < args.strain_high_pass: + print >>sys.stderr, "Warning: strain high pass frequency %f is greater than the tile minimum frequency %f --- this is likely to cause strange output below the bandpass frequency" % (args.strain_high_pass, args.min_frequency) + +In case the maximum frequency in the filter bank is not defined, we set it to be equal to the Nyquist frequency, i.e. half the sampling rate, which makes sense as a larger signal will not be able to get easily identifiable. :: + + if args.max_frequency is None: + args.max_frequency = args.sample_rate / 2.0 + +If the bandwidth of the finest filter (``--tile-bandwidth`` argument, see section :ref:`construct_args ` or the number of frequency channels (=--channels= argument) is not defined but the total spectral band is (``data_band``), one can then determined all the filter settings as follows: :: + + + if args.tile_bandwidth is None and args.channels is None: + # Exit program with error message + exit("Either --tile-bandwidth or --channels must be specified to set up time-frequency plane") + else: + # Define as assert statement that tile maximum frequency larger than its minimum frequency + assert args.max_frequency >= args.min_frequency + # Define spectral band of data + data_band = args.max_frequency - args.min_frequency + # Check if tile bandwidth or channel is defined + if args.tile_bandwidth is not None: + # Define number of possible filter bands + nchans = args.channels = int(data_band / args.tile_bandwidth) - 1 + elif args.channels is not None: + # Define filter bandwidth + band = args.tile_bandwidth = data_band / (args.channels + 1) + assert args.channels > 1 + +The minimum frequency to be explored can be user-defined by using the ``--min-frequency`` option. :: + + # Lowest frequency of the first filter + flow = args.min_frequency + +Code access +----------- + +.. currentmodule:: gdas.epower + +.. autosummary:: + :toctree: generated/ + + check_filtering_settings + + + diff --git a/sphinx/epower_step2_psd.rst b/sphinx/epower_step2_psd.rst new file mode 100644 index 0000000..3960f7e --- /dev/null +++ b/sphinx/epower_step2_psd.rst @@ -0,0 +1,54 @@ +.. _psdestimate: + +Excess Power - Step 2: Estimate Power Spectral Density (PSD) +============================================================ + +The instrument's noise Power Spectral Density (PSD) will be used to whiten the data and help reveal the existence, or the absence, of repetitive patterns and correlation structures in the signal process. It will also determine the total bandwidth spanned by each of the filters that will subsequently be created. The first thing to do before calculating the PSD is to ensure that the time series data is converted into an array of floating values. :: + + # Convert time series as array of float + data = ts_data.astype(numpy.float64) + +The PSD is calculated by splitting up the signal into overlapping segments and scan through each segment to calculate individual periodogram. The periodograms from each segment are then averaged, reducing the variance of the individual power measurements. In order to proceed, we need to define the average method, ``avg_method``, that will be used to measure the PSD from the data. This can be specified with the ``--psd-estimation`` option. :: + + # Average method to measure PSD from the data + avg_method = args.psd_estimation + +One also needs to specify the length of each segment, ``seg_len``, as well as the separation between 2 consecutive segments, ``seg_stride``. Both parameters can be defined in second units with the ``--psd-segment-length`` and ``--psd-segment-stride`` arguments respectively and can then be converted into sample unit. :: + + # The segment length for PSD estimation in samples + seg_len = int(args.psd_segment_length * args.sample_rate) + # The separation between consecutive segments in samples + seg_stride = int(args.psd_segment_stride * args.sample_rate) + +We then use the `Welch's method `_ to perform the power spectral density estimate using the `welch `_ module from the ``pycbc.psd`` library. What this will do is to compute the discrete Fourier transform for each PSD segment to produce invidual periodograms, and then compute the squared magnitude of the result. The individual periodograms are then averaged using the user-defined average method, ``avg_method``, and return the frequency series, ``fd_psd``, which will store the power measurement for each frequency bin. :: + + # Lifted from the psd.from_cli module + fd_psd = psd.welch(data,avg_method=avg_method,seg_len=seg_len,seg_stride=seg_stride) + # Plot the power spectral density + plot_spectrum(fd_psd) + # We need this for the SWIG functions + lal_psd = fd_psd.lal() + +One can display the power measurements, frequency array and frequency between consecutive samples, :math:`\Delta f` in Hertz, by printing the following variables: :: + + print 'Display power measurements of the first 10 frequency bins' + print fd_psd[:10] + print 'Display central frequency of the first 10 bins' + print fd_psd.sample_frequencies[:10] + print 'Display the frequency separation between bins' + print fd_psd.delta_f + +:math:`\Delta f` corresponds to the inverse of a segment's length which is the smallest frequency (i.e. highest period) of detectable signals in each segment. The frequency range spans from 0 to the Nyquist frequency, i.e. half de the sampling rate. + +Code access +----------- + +.. currentmodule:: gdas.epower + +.. autosummary:: + :toctree: generated/ + + calculate_psd + + + diff --git a/sphinx/epower_step3_speccor.rst b/sphinx/epower_step3_speccor.rst new file mode 100644 index 0000000..d9ab49f --- /dev/null +++ b/sphinx/epower_step3_speccor.rst @@ -0,0 +1,25 @@ +Excess Power - Step 3: Two point spectral correlation +===================================================== + +This part determines how much data on either side of the tukey window is to be discarded. Nominally, this means that one will lose ``window_fraction`` * ``args.psd_segment_length`` to corruption from the window, i.e. this is simply discarded. This is tuned to give an integer offset when used with ``args.psd_segment_length`` equal to 8, smaller windows will have fractions of integers, but larger powers of two will still preseve this (probably not a big deal in the end). :: + + window_fraction = 0 + +The two point spectral correlation is then done with the :ref:`calculate_spectral_correlation ` function which will return both the Tukey window applied to the original time series data and the actual two-point spectral correlation function for the whitened frequency series from the applied whitening window. :: + + # Do two point spectral correlation + window, spec_corr = calculate_spectral_correlation(seg_len,'tukey',window_fraction=window_fraction) + window = window.data.data + window_sigma_sq = numpy.mean(window**2) + # Pre scale the window by its root mean squared -- see eqn 11 of EP document + #window /= numpy.sqrt(window_sigma_sq) + +Code access +----------- + +.. currentmodule:: gdas.epower + +.. autosummary:: + :toctree: generated/ + + calculate_spectral_correlation diff --git a/sphinx/epower_step4_filterbank.rst b/sphinx/epower_step4_filterbank.rst new file mode 100644 index 0000000..57855a6 --- /dev/null +++ b/sphinx/epower_step4_filterbank.rst @@ -0,0 +1,45 @@ +.. _filterbank: + +Excess Power - Step 4: Computing the filter bank +================================================ + +The filter bank will create band-pass filters for each channel in the PSD frequency domain. The :ref:`create_filter_bank ` function will san the bandwidth from the central frequency of the first channel (i.e. flow+band/2) to final frequency of the last channel (i.e. band*nchans) in a increment equal to the frequency band. The filter's total extent in Fourier space is actually twice the stated bandwidth (FWHM). :: + + # Define filters + filter_bank, fdb = create_filter_bank(fd_psd.delta_f, flow+band/2, band, nchans, fd_psd, spec_corr) + +This function will returns 2 arrays: the ``filter_bank`` array which is a list of `COMPLEX16FrequencySeries `_ arrays corresponding to each channel's filter, and the =fdb= array which provides the time-series from each filter. The length of each array is equal to the total number of channel (i.e. =nchans=). The filter's data, :math:`\Delta f` value, and first and last frequencies of any channel's filter can be displayed as followed: :: + + # Print data of first channel's filter + print filter_bank[0].data.data + # Print frequency separation between 2 values in the first channel's filter + print filter_bank[0].deltaF + # Print first frequency of the first channel's filter + print filter_bank[0].f0 + # Print last frequency of the first channel's filter (equal to twice the channel's bandwidth) + print filter_bank[0].f0+(len(filter_bank[0].data.data)-1)*filter_bank[0].deltaF + +Further in the analysis, the following filters will used: +1. ``white_filter_ip``: Whitened filter inner products computed with themselves. +2. ``unwhite_filter_ip``: Unwhitened filter inner products computed with themselves. +3. ``white_ss_ip``: Whitened filter inner products computed between input adjacent filters. +4. ``unwhite_ss_ip``: Unwhitened filter inner products computed between input adjacent filters. + +:: + + # This is necessary to compute the mu^2 normalizations + white_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, None) + unwhite_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, lal_psd) + # These two are needed for the unwhitened mean square sum (hrss) + white_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, None) + unwhite_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, lal_psd) + +Code access +----------- + +.. currentmodule:: gdas.epower + +.. autosummary:: + :toctree: generated/ + + create_filter_bank diff --git a/sphinx/epower_step5_normalization.rst b/sphinx/epower_step5_normalization.rst new file mode 100644 index 0000000..66b0f3e --- /dev/null +++ b/sphinx/epower_step5_normalization.rst @@ -0,0 +1,23 @@ +Excess Power - Step 5: Normalization of virtual channel +======================================================= + +The virtual channels will be used during the excesspower analysis to explore different frequency ranges around each PSD segments and look for possible triggers. Each channel is renormalized using the :ref:`compute_channel_renomalization ` internal function. :: + + # Initialise dictionary + mu_sq_dict = {} + # nc_sum additional channel adds + for nc_sum in range(0, int(math.log(nchans, 2))): + min_band = (len(filter_bank[0].data.data)-1) * filter_bank[0].deltaF / 2 + print tprint(t0,t1),"Calculation for %d %d Hz channels" % (nc_sum+1, min_band) + nc_sum = 2**nc_sum - 1 + mu_sq_dict[nc_sum] = compute_channel_renomalization(nc_sum, filter_bank, spec_corr, nchans) + +Code access +----------- + +.. currentmodule:: gdas.epower + +.. autosummary:: + :toctree: generated/ + + compute_channel_renormalization diff --git a/sphinx/epower_step6_initialisation.rst b/sphinx/epower_step6_initialisation.rst new file mode 100644 index 0000000..02e17f4 --- /dev/null +++ b/sphinx/epower_step6_initialisation.rst @@ -0,0 +1,41 @@ +Initialise event list and determine stride boundaries +===================================================== + +First of all, we create a table similar than the one made by the LIGO Scientific Collaboration (LSC) where all the information will be stored. Such table is commonly know as ``lsctables``. A pre-defined LSC table can be constructed using ``New`` function from the `glue.ligolw.lsctables `_ module. We use the ``SnglBurstTable`` function for the type of data to be stored and define all the columns we wish to record. :: + + # Create event list for single burst table + event_list = lsctables.New(lsctables.SnglBurstTable, + ['start_time','start_time_ns','peak_time','peak_time_ns', + 'duration','bandwidth','central_freq','chisq_dof', + 'confidence','snr','amplitude','channel','ifo', + 'process_id','event_id','search','stop_time','stop_time_ns']) + +We also need to determine the indexes of both starting and ending times for the first segment to analyse, respectively ``t_idx_min`` and ``t_idx_max``. The default values are considered to be 0 for the starting index and the segment length in sample unit for the ending time index. Also, if the user defines a different starting time than the one from the loaded data, the offset index in sample unit is determined and added the both starting and ending time indexes. :: + + # Determine boundaries of stride in time domain + t_idx_min, t_idx_max = 0, seg_len + # Check if user requested starting time is defined + if args.analysis_start_time is not None: + # Define the time difference in seconds between data and user requested starting times + t_idx_off = args.analysis_start_time - ts_data.start_time + # Calculate the index of the user requested starting point in the data + t_idx_off = int(t_idx_off * args.sample_rate) + else: + # Define index of the starting point as first value in data + t_idx_off = 0 + # Initialise minimum index values as offset starting index + t_idx_min += t_idx_off + # Initialise maximum index values as offset starting index + t_idx_max += t_idx_off + +Finally, the index for the ending time after all the segments have been analysed can be estimated for the user-defined parameter or is defined as the length of the time series data ``ts_data``. :: + + # Check if user requested end time is defined + if args.analysis_end_time is not None: + # Define the time difference between data and user requested ending times + t_idx_max_off = args.analysis_end_time - ts_data.start_time + # Calculate the index of the user requested starting point in the data + t_idx_max_off = int(t_idx_max_off * args.sample_rate) + else: + # Define index of the ending point as the length of data array + t_idx_max_off = len(ts_data) diff --git a/sphinx/example.rst b/sphinx/example.rst new file mode 100644 index 0000000..6677764 --- /dev/null +++ b/sphinx/example.rst @@ -0,0 +1,41 @@ +Working Example +=============== + +Either on your own computer or on the server, on a Jupyter notebook or on a Python script, the first thing to do is to import the ``gdas`` package that contain all the modules present in the GNOME software. That can be done easily by doing the following:: + + import gdas + +In order to retrieve a specific chunk of data to be analyzed for a particular station, the name of the station along with the start and end dates should be specified:: + + station = 'fribourg01' + start_time = '2016-11-03-04' + end_time = '2016-11-03-04-2' + +where the start and end times should always have at least the year, month and day specified, and with the values separated by a dash symbol. Hour and minute can also be specified. + +If you are not working on the server and the data are located in a different repository than ``/GNOMEDrive/gnome/serverdata/``, a custom path can be defined. For instance:: + + datapath = '/Users/vincent/data/GNOMEDrive/gnome/serverdata/' + +The magnetic field data can then be retrieve as follows:: + + ts_data,ts_list,activity = gdas.magfield(station,start_time,end_time,rep=datapath) + +The ``gdas.magfield`` method will return 3 arrays of data that can then be used to produce different plots:: + + gdas.plot_activity(activity) + gdas.plot_time_series(station,ts_list,seglist=activity) + gdas.plot_asd(station,ts_list) + gdas.plot_whitening(station,ts_list,activity) + +This is a script to do Excess Power analysis:: + + psd_segment_length = 60 + psd_segment_stride = 30 + psd_estimation = 'median-mean' + window_fraction = 0 + tile_fap = 1e-5 + channels = 250 + + gdas.excess_power(ts_data,psd_segment_length,psd_segment_stride,psd_estimation,window_fraction,tile_fap,station,nchans=channels) + gdas.plot_triggers() diff --git a/sphinx/excess_power.rst b/sphinx/excess_power.rst new file mode 100644 index 0000000..f00a33d --- /dev/null +++ b/sphinx/excess_power.rst @@ -0,0 +1,923 @@ +Excess-Power algorithm +====================== + +General overview +---------------- + +The **Excess Power method** is known as the *optimal detection strategy* to search for burst signals for which only the duration and frequency band are known, which is basically the case for GNOME and its search of Axion-Like Particles (ALP). This method was developed and introduced by `Anderson et al. (200) `_ and has been extensively used in the detection of burst sources of gravitational radiation. A more technical documentation was written by `Brady et al. (2007) `_ describing how the algorithm used by the LIGO collaboration works and how the theory is translated into code. + +We present below a step-by-step procedure followed during the Excess Power search analysis. For a better representation of what is happening, the figure at the end shows how the data is being split and analysed to search for multiple signals of different bandwidth and duration in the time-frequency plane. + +- :ref:`Time domain segmentation and PSD estimate ` + + We first estimate the instrument's noise Power Spectral Density (PSD) by splitting the time-series data into multiple overlapping segments. A periodogram for each segment is calculated separately and then averaged, which will reduce the variance of the individual power measurements. The result is a frequency series where samples are separated in frequency space by :math:`\Delta f` equal to the inverse of a segment’s length and with a high end frequency limit equal to the Nyquist limit. The final power spectrum will help reveal the existence, or the absence, of repetitive patterns and correlation structures in a signal process. + +- :ref:`Comb of frequency channels ` + + We then split the PSD frequency series into multiple channels. For each channel, a frequency domain filter is created with a :math:`\Delta f` determined by the PSD and a total extent in Fourier space that is twice the stated bandwidth of a channel. The result is a list of each channel filter's frequency series. + +- :ref:`Creating analysing blocks ` + + The Excess Power method can lead to moderately-large computational requirements, and it has been found that the computational efficiency of this implementation can be improved upon by considering blocks of data that are much longer than the longest signal time duration. The entire time series is therefore split into separate blocks. We use the length of the segments used for PSD estimate to define the duration of each block. For each block, the time series is c0Aonverted into frequency series which is then filtered by the filter bank throughout all the channels. A time-frequency map is finally created which stores all the filtered frequency series from each channel. + +- :ref:`Creating tiles with different bandwidth ` + + We can now construct tiles with different bandwidth by summing multiple channels together. + +- :ref:`Exploring tiles with different duration ` + + For each given tile's bandwidth, one can investigate different tile's duration. This can be done by exploring different number of degrees of freedom, :math:`d`, which can be calculated as follows: :math:`d=2BT` where :math:`B` and :math:`T` are respectively the bandwidth and duration of the tile. Section 2.2.5 of `Brady et al. `_ gives a great description of how to interpret the number of degrees of freedom. Therefore, by changing the :math:`d`, one can explore multiple tile's duration for different bandwidth. + +- :ref:`Define triggering signal ` + + The energy of each tile in the time-frequency space is calculated and compare to a user-defined threshold value. After defining a tile false alarm probability threshold in Gaussian noise and using the number of degrees of freedom for each tile, one can define a energy threshold value above which a burst trigger can be identified by comparing the energy threshold with the tile's energy in the time-frequency map. A tile energy time frequency map plot similar to Figure 5 in `Pustelny et al. (2013) `_ can then be made which plots the outlying tile energies present in the data. + +.. figure:: ./img/overview.png + + Overview of the Excess Power method and difference between segments, channels, tiles and blocks. + +.. _psdestimate: + +Estimate Power Spectral Density (PSD) +------------------------------------- + +The instrument's noise Power Spectral Density (PSD) will be used to whiten the data and help reveal the existence, or the absence, of repetitive patterns and correlation structures in the signal process. It will also determine the total bandwidth spanned by each of the filters that will subsequently be created. The first thing to do before calculating the PSD is to ensure that the time series data is converted into an array of floating values. :: + + # Convert time series as array of float + data = ts_data.astype(numpy.float64) + +The PSD is calculated by splitting up the signal into overlapping segments and scan through each segment to calculate individual periodogram. The periodograms from each segment are then averaged, reducing the variance of the individual power measurements. In order to proceed, we need to define the average method, ``avg_method``, that will be used to measure the PSD from the data. This can be specified with the ``--psd-estimation`` option. :: + + # Average method to measure PSD from the data + avg_method = args.psd_estimation + +One also needs to specify the length of each segment, ``seg_len``, as well as the separation between 2 consecutive segments, ``seg_stride``. Both parameters can be defined in second units with the ``--psd-segment-length`` and ``--psd-segment-stride`` arguments respectively and can then be converted into sample unit. :: + + # The segment length for PSD estimation in samples + seg_len = int(args.psd_segment_length * args.sample_rate) + # The separation between consecutive segments in samples + seg_stride = int(args.psd_segment_stride * args.sample_rate) + +We then use the `Welch's method `_ to perform the power spectral density estimate using the `welch `_ module from the ``pycbc.psd`` library. What this will do is to compute the discrete Fourier transform for each PSD segment to produce invidual periodograms, and then compute the squared magnitude of the result. The individual periodograms are then averaged using the user-defined average method, ``avg_method``, and return the frequency series, ``fd_psd``, which will store the power measurement for each frequency bin. :: + + # Lifted from the psd.from_cli module + fd_psd = psd.welch(data,avg_method=avg_method,seg_len=seg_len,seg_stride=seg_stride) + # Plot the power spectral density + plot_spectrum(fd_psd) + # We need this for the SWIG functions + lal_psd = fd_psd.lal() + +One can display the power measurements, frequency array and frequency between consecutive samples, :math:`\Delta f` in Hertz, by printing the following variables: :: + + print 'Display power measurements of the first 10 frequency bins' + print fd_psd[:10] + print 'Display central frequency of the first 10 bins' + print fd_psd.sample_frequencies[:10] + print 'Display the frequency separation between bins' + print fd_psd.delta_f + +:math:`\Delta f` corresponds to the inverse of a segment's length which is the smallest frequency (i.e. highest period) of detectable signals in each segment. The frequency range spans from 0 to the Nyquist frequency, i.e. half de the sampling rate. + +Checking filtering settings +--------------------------- + +The first thing to check is that the frequency of the high-pass filter (if defined) is below the minimum frequency of the filter bank. Indeed, a high-pass filter will only let pass frequency that are higher than the cutoff frequency (here defined by the ``strain_high_pass`` argument). If the high pass frequency is greater from the minimum frequency in the filter bank, the signal with frequencies lower than the cutoff frequency will get attenuated. :: + + if args.min_frequency < args.strain_high_pass: + print >>sys.stderr, "Warning: strain high pass frequency %f is greater than the tile minimum frequency %f --- this is likely to cause strange output below the bandpass frequency" % (args.strain_high_pass, args.min_frequency) + +In case the maximum frequency in the filter bank is not defined, we set it to be equal to the Nyquist frequency, i.e. half the sampling rate, which makes sense as a larger signal will not be able to get easily identifiable. :: + + if args.max_frequency is None: + args.max_frequency = args.sample_rate / 2.0 + +If the bandwidth of the finest filter (``--tile-bandwidth`` argument, see section :ref:`construct_args ` or the number of frequency channels (=--channels= argument) is not defined but the total spectral band is (``data_band``), one can then determined all the filter settings as follows: :: + + + if args.tile_bandwidth is None and args.channels is None: + # Exit program with error message + exit("Either --tile-bandwidth or --channels must be specified to set up time-frequency plane") + else: + # Define as assert statement that tile maximum frequency larger than its minimum frequency + assert args.max_frequency >= args.min_frequency + # Define spectral band of data + data_band = args.max_frequency - args.min_frequency + # Check if tile bandwidth or channel is defined + if args.tile_bandwidth is not None: + # Define number of possible filter bands + nchans = args.channels = int(data_band / args.tile_bandwidth) - 1 + elif args.channels is not None: + # Define filter bandwidth + band = args.tile_bandwidth = data_band / (args.channels + 1) + assert args.channels > 1 + +The minimum frequency to be explored can be user-defined by using the ``--min-frequency`` option. :: + + # Lowest frequency of the first filter + flow = args.min_frequency + +Whitening window and spectral correlation +----------------------------------------- + +This part determines how much data on either side of the tukey window is to be discarded. Nominally, this means that one will lose ``window_fraction`` * ``args.psd_segment_length`` to corruption from the window, i.e. this is simply discarded. This is tuned to give an integer offset when used with ``args.psd_segment_length`` equal to 8, smaller windows will have fractions of integers, but larger powers of two will still preseve this (probably not a big deal in the end). :: + + window_fraction = 0 + +The two point spectral correlation is then done with the :ref:`calculate_spectral_correlation ` function which will return both the Tukey window applied to the original time series data and the actual two-point spectral correlation function for the whitened frequency series from the applied whitening window. :: + + # Do two point spectral correlation + window, spec_corr = calculate_spectral_correlation(seg_len,'tukey',window_fraction=window_fraction) + window = window.data.data + window_sigma_sq = numpy.mean(window**2) + # Pre scale the window by its root mean squared -- see eqn 11 of EP document + #window /= numpy.sqrt(window_sigma_sq) + +.. _filterbank: + +Computing the filter bank +------------------------- + +The filter bank will create band-pass filters for each channel in the PSD frequency domain. The :ref:`create_filter_bank ` function will san the bandwidth from the central frequency of the first channel (i.e. flow+band/2) to final frequency of the last channel (i.e. band*nchans) in a increment equal to the frequency band. The filter's total extent in Fourier space is actually twice the stated bandwidth (FWHM). :: + + # Define filters + filter_bank, fdb = create_filter_bank(fd_psd.delta_f, flow+band/2, band, nchans, fd_psd, spec_corr) + +This function will returns 2 arrays: the ``filter_bank`` array which is a list of `COMPLEX16FrequencySeries `_ arrays corresponding to each channel's filter, and the =fdb= array which provides the time-series from each filter. The length of each array is equal to the total number of channel (i.e. =nchans=). The filter's data, :math:`\Delta f` value, and first and last frequencies of any channel's filter can be displayed as followed: :: + + # Print data of first channel's filter + print filter_bank[0].data.data + # Print frequency separation between 2 values in the first channel's filter + print filter_bank[0].deltaF + # Print first frequency of the first channel's filter + print filter_bank[0].f0 + # Print last frequency of the first channel's filter (equal to twice the channel's bandwidth) + print filter_bank[0].f0+(len(filter_bank[0].data.data)-1)*filter_bank[0].deltaF + +Further in the analysis, the following filters will used: +1. ``white_filter_ip``: Whitened filter inner products computed with themselves. +2. ``unwhite_filter_ip``: Unwhitened filter inner products computed with themselves. +3. ``white_ss_ip``: Whitened filter inner products computed between input adjacent filters. +4. ``unwhite_ss_ip``: Unwhitened filter inner products computed between input adjacent filters. + +:: + + # This is necessary to compute the mu^2 normalizations + white_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, None) + unwhite_filter_ip = compute_filter_ips_self(filter_bank, spec_corr, lal_psd) + # These two are needed for the unwhitened mean square sum (hrss) + white_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, None) + unwhite_ss_ip = compute_filter_ips_adjacent(filter_bank, spec_corr, lal_psd) + +Normalization of virtual channel +-------------------------------- + +The virtual channels will be used during the excesspower analysis to explore different frequency ranges around each PSD segments and look for possible triggers. Each channel is renormalized using the :ref:`compute_channel_renomalization ` internal function. :: + + # Initialise dictionary + mu_sq_dict = {} + # nc_sum additional channel adds + for nc_sum in range(0, int(math.log(nchans, 2))): + min_band = (len(filter_bank[0].data.data)-1) * filter_bank[0].deltaF / 2 + print tprint(t0,t1),"Calculation for %d %d Hz channels" % (nc_sum+1, min_band) + nc_sum = 2**nc_sum - 1 + mu_sq_dict[nc_sum] = compute_channel_renomalization(nc_sum, filter_bank, spec_corr, nchans) + +Initialise event list and determine stride boundaries +----------------------------------------------------- + +First of all, we create a table similar than the one made by the LIGO Scientific Collaboration (LSC) where all the information will be stored. Such table is commonly know as ``lsctables``. A pre-defined LSC table can be constructed using ``New`` function from the `glue.ligolw.lsctables `_ module. We use the ``SnglBurstTable`` function for the type of data to be stored and define all the columns we wish to record. :: + + # Create event list for single burst table + event_list = lsctables.New(lsctables.SnglBurstTable, + ['start_time','start_time_ns','peak_time','peak_time_ns', + 'duration','bandwidth','central_freq','chisq_dof', + 'confidence','snr','amplitude','channel','ifo', + 'process_id','event_id','search','stop_time','stop_time_ns']) + +We also need to determine the indexes of both starting and ending times for the first segment to analyse, respectively ``t_idx_min`` and ``t_idx_max``. The default values are considered to be 0 for the starting index and the segment length in sample unit for the ending time index. Also, if the user defines a different starting time than the one from the loaded data, the offset index in sample unit is determined and added the both starting and ending time indexes. :: + + # Determine boundaries of stride in time domain + t_idx_min, t_idx_max = 0, seg_len + # Check if user requested starting time is defined + if args.analysis_start_time is not None: + # Define the time difference in seconds between data and user requested starting times + t_idx_off = args.analysis_start_time - ts_data.start_time + # Calculate the index of the user requested starting point in the data + t_idx_off = int(t_idx_off * args.sample_rate) + else: + # Define index of the starting point as first value in data + t_idx_off = 0 + # Initialise minimum index values as offset starting index + t_idx_min += t_idx_off + # Initialise maximum index values as offset starting index + t_idx_max += t_idx_off + +Finally, the index for the ending time after all the segments have been analysed can be estimated for the user-defined parameter or is defined as the length of the time series data ``ts_data``. :: + + # Check if user requested end time is defined + if args.analysis_end_time is not None: + # Define the time difference between data and user requested ending times + t_idx_max_off = args.analysis_end_time - ts_data.start_time + # Calculate the index of the user requested starting point in the data + t_idx_max_off = int(t_idx_max_off * args.sample_rate) + else: + # Define index of the ending point as the length of data array + t_idx_max_off = len(ts_data) + +.. _analysingblocks: + +Define analysing blocks +----------------------- + +The first thing we do is to calculate the time series for the segment that is covered (``tmp_ts_data``) and redefined the metadata, especially the time of the first sample in seconds which is defined by the ``epoch`` argument and is different for every segment. After plotting the time series for that segment, the data are then converted into frequency series (``fs_data``) using the `to_frequencyseries `_ module from the ``pycbc.types.timeseries.TimeSeries`` library. Finally, the frequency data are then whitened. :: + + # Loop over each data within the user requested time period + while t_idx_max <= t_idx_max_off: + # Define starting and ending time of the segment in seconds + start_time = ts_data.start_time + t_idx_min/float(args.sample_rate) + end_time = ts_data.start_time + t_idx_max/float(args.sample_rate) + print tprint(t0,t1),"Analyzing block %i to %i (%.2f percent)"%(start_time,end_time,100*float(t_idx_max)/float(idx_max_off)) + # Model a withen time series for the block + tmp_ts_data = types.TimeSeries(ts_data[t_idx_min:t_idx_max]*window, 1.0/args.sample_rate,epoch=start_time) + # Save time series in segment repository + segfolder = 'segments/%i-%i'%(start_time,end_time) + os.system('mkdir -p '+segfolder) + plot_ts(tmp_ts_data,fname='%s/ts.png'%(segfolder)) + # Convert times series to frequency series + fs_data = tmp_ts_data.to_frequencyseries() + print tprint(t0,t1),"Frequency series data has variance: %s" % fs_data.data.std()**2 + # Whitening (FIXME: Whiten the filters, not the data) + fs_data.data /= numpy.sqrt(fd_psd) / numpy.sqrt(2 * fd_psd.delta_f) + print tprint(t0,t1),"Whitened frequency series data has variance: %s" % fs_data.data.std()**2 + +Create time-frequency map for each block +---------------------------------------- + +We initialise a 2D zero array for a time-frequency map (``tf_map``) which will be computed for each frequency-domain filter associated to each PSD segment and where the filtered time-series for each frequency channels will be stored. The number of rows corresponds to the total number of frequency channels which is defined by the ``nchans`` variable. The number of columns corresponds to the segment length in samples (i.e. the number of samples covering one segment) which is defined by the ``seg_len`` variable. :: + + # Initialise 2D zero array for time-frequency map + tf_map = numpy.zeros((nchans, seg_len), dtype=numpy.complex128) + +We also initialise a zero vector for a temporary filter bank (``tmp_filter_bank``) that will store, for a given channel, the filter's values from the original filter bank (``filter_bank``) for that channel only. The length of the temporary filter bank is equal to the length of the PSD frequency series (``fd_psd``). :: + + # Initialise 1D zero array + tmp_filter_bank = numpy.zeros(len(fd_psd), dtype=numpy.complex128) + +We then loop over all the frequency channels. While in the loop, we first re-initialise the temporary filter bank with zero values everywhere along the frequency series. We then determine the first and last frequency of each channel and re-define the values of the filter in that frequency range based on the values from the original channel's filter from the original filter bank. :: + + # Loop over all the channels + print tprint(t0,t1),"Filtering all %d channels..." % nchans + for i in range(nchans): + # Reset filter bank series + tmp_filter_bank *= 0.0 + # Index of starting frequency + f1 = int(filter_bank[i].f0/fd_psd.delta_f) + # Index of ending frequency + f2 = int((filter_bank[i].f0 + 2*band)/fd_psd.delta_f)+1 + # (FIXME: Why is there a factor of 2 here?) + tmp_filter_bank[f1:f2] = filter_bank[i].data.data * 2 + +We then extract the frequency series from the filter bank for that channel, which will be used as a template waveform to filter the actual data from the channel. :: + + # Define the template to filter the frequency series with + template = types.FrequencySeries(tmp_filter_bank, delta_f=fd_psd.delta_f, copy=False) + +Finally, we use the `matched_filter_core `_ module from the ``pycbc.filter.matchedfilter`` library to filter the frequency series from the channel. This will return both a time series containing the complex signal-to-noise matched filtered against the data, and a frequency series containing the correlation vector. :: + + # Create filtered series + filtered_series = filter.matched_filter_core(template,fs_data,h_norm=None,psd=None, + low_frequency_cutoff=filter_bank[i].f0, + high_frequency_cutoff=filter_bank[i].f0+2*band) + +The `matched filter `_ is the optimal linear filter for maximizing the signal to noise ratio (SNR) in the presence of additive stochastic noise. The filtered time series is stored in the time-frequency map and can be used to produce a spectrogram of the segment of data being analysed. :: + + # Include filtered series in the map + tf_map[i,:] = filtered_series[0].numpy() + +The time-frequency map is a 2D array with a length that corresponds to the number of channels and a width equal to the number of sample present in one segment of data, i.e. segment's length in seconds times the the sampling rate. The map can finally be plotted with a :math:`\Delta t` corresponding to the sampling period of the original dataset (i.e. inverse of the original sampling rate), and :math:`\Delta f` is equal to the bandwidth of one channel. :: + + plot_spectrogram(numpy.abs(tf_map).T,tmp_ts_data.delta_t,fd_psd.delta_f,ts_data.sample_rate,start_time,end_time,fname='%s/tf.png'%(segfolder)) + +.. _tilebandwidth: + +Constructing tiles of different bandwidth +----------------------------------------- + +First and foremost, we define a clipping region in the data to be used to remove window corruption, this is non-zero if the ``window_fraction`` variable is set to a non-zero value. :: + + print tprint(t0,t1),"Beginning tile construction..." + # Clip the boundaries to remove window corruption + clip_samples = int(args.psd_segment_length * window_fraction * args.sample_rate / 2) + +In order to perform a multi-resolution search, tiles of many different bandwidths and durations will be scanned. We first need to setup a loop such that the maximum number of additional channel is equal to the base 2 logarithm of the total number of channels. The number of narrow band channels to be summed (``nc_sum``) would therefore be equal to 2 to the power of the current quantity of additional channels. :: + + for nc_sum in range(0, int(math.log(nchans, 2)))[::-1]: # nc_sum additional channel adds + nc_sum = 2**nc_sum - 1 + print tprint(t0,t1,t2),"Summing %d narrow band channels..." % (nc_sum+1) + +The undersampling rate for this tile can be calculated using the channel frequency band and the number of narrow band channels to be summed such that the bandwidth of the tile is equal to ``band * (nc_sum + 1)``. :: + + us_rate = int(round(1.0 / (2 * band*(nc_sum+1) * ts_data.delta_t))) + print >>sys.stderr, "Undersampling rate for this level: %f" % (args.sample_rate/us_rate) + +"Virtual" wide bandwidth channels are constructed by summing the samples from multiple channels, and correcting for the overlap between adjacent channel filters. We then define the normalised channel at the current level and create a time frequency map for this tile using the :ref:`make_indp_tiles ` internal function. In other word, we are constructing multiple sub-tiles for which we can determined the respective energy in the given frequency band. :: + + mu_sq = mu_sq_dict[nc_sum] + sys.stderr.write("\t...calculating tiles...") + if clip_samples > 0: + tiles = make_indp_tiles(tf_map[:,clip_samples:-clip_samples:us_rate], nc_sum, mu_sq) + else: + tiles = make_indp_tiles(tf_map[:,::us_rate], nc_sum, mu_sq) + sys.stderr.write(" TF-plane is %dx%s samples... " % tiles.shape) + print >>sys.stderr, " done" + print "Tile energy mean: %f, var %f" % (numpy.mean(tiles), numpy.var(tiles)) + +.. _tileduration: + +Explore multiple tile durations +------------------------------- + +Now that we create a tile with a specific bandwidth, we can start exploring different durations for the tile. We will start checking if the user manually defined a value for the longest duration tile to compute, which can be done using the ``--max-duration`` argument. If not, the value will be set to 32. :: + + if args.max_duration is not None: + max_dof = 2 * args.max_duration * (band * (nc_sum+1)) + else: + max_dof = 32 + assert max_dof >= 2 + +Since we produce (initially) tiles with 1 degree of freedom, the duration goes as one over twice the bandwidth. :: + + print "\t\t...getting longer durations..." + #for j in [2**l for l in xrange(1, int(math.log(max_dof, 2))+1)]: + for j in [2**l for l in xrange(0, int(math.log(max_dof, 2)))]: + sys.stderr.write("\t\tSumming DOF = %d ..." % (2*j)) + #tlen = tiles.shape[1] - j + 1 + tlen = tiles.shape[1] - 2*j + 1 + 1 + if tlen <= 0: + print >>sys.stderr, " ...not enough samples." + continue + dof_tiles = numpy.zeros((tiles.shape[0], tlen)) + #:sum_filter = numpy.ones(j) + # FIXME: This is the correct filter for 50% overlap + sum_filter = numpy.array([1,0] * (j-1) + [1]) + #sum_filter = numpy.array([1,0] * int(math.log(j, 2)-1) + [1]) + for f in range(tiles.shape[0]): + # Sum and drop correlate tiles + # FIXME: don't drop correlated tiles + #output = numpy.convolve(tiles[f,:], sum_filter, 'valid') + dof_tiles[f] = fftconvolve(tiles[f], sum_filter, 'valid') + print >>sys.stderr, " done" + print "Summed tile energy mean: %f, var %f" % (numpy.mean(dof_tiles), numpy.var(dof_tiles)) + level_tdiff = time.time() - tdiff + print >>sys.stderr, "Done with this resolution, total %f" % level_tdiff + +Finally, the bandwidth and duration of the tile can be defined as followed: :: + + # Current bandwidth of the time-frequency map tiles + current_band = band * (nc_sum + 1) + # How much each "step" is in the frequency domain -- almost + # assuredly the fundamental bandwidth + df = current_band + # How much each "step" is in the time domain -- under sampling rate + # FIXME: THis won't work if the sample rate isn't a power of 2 + dt = 1.0 / 2 / (2 * current_band) * 2 + full_band = 250 + dt = current_band / full_band * ts_data.sample_rate + dt = 1.0/dt + # Duration is fixed by the NDOF and bandwidth + duration = j / 2.0 / current_band + +.. _triggerfinding: + +Trigger finding +--------------- + +In order to find any trigger in the data, we first need to set a false alarm probability threshold in Gaussian noise above which signal will be distinguished from the noise. Such threshold can be determined by using the /inverse survival function/ method from the `scipy.stats.chi2 `_ package. :: + + threshold = scipy.stats.chi2.isf(args.tile_fap, j) + print "Threshold for this level: %f" % threshold + #if numpy.any(dof_tiles > threshold): + #plot_spectrogram(dof_tiles.T) + #import pdb; pdb.set_trace() + +Once the threshold is set, one can then run the :ref:`trigger_list_from_map ` function to quickly find the trigger signal from the ``dof_tiles`` array that :: + + # Since we clip the data, the start time needs to be adjusted accordingly + window_offset_epoch = fs_data.epoch + args.psd_segment_length * window_fraction / 2 + trigger_list_from_map(dof_tiles, event_list, threshold, window_offset_epoch, filter_bank[0].f0 + band/2, duration, current_band, df, dt, None) + for event in event_list[::-1]: + if event.amplitude != None: + continue + etime_min_idx = float(event.get_start()) - float(fs_data.epoch) + etime_min_idx = int(etime_min_idx / tmp_ts_data.delta_t) + etime_max_idx = float(event.get_start()) - float(fs_data.epoch) + event.duration + etime_max_idx = int(etime_max_idx / tmp_ts_data.delta_t) + # (band / 2) to account for sin^2 wings from finest filters + flow_idx = int((event.central_freq - event.bandwidth / 2 - (band / 2) - flow) / band) + fhigh_idx = int((event.central_freq + event.bandwidth / 2 + (band / 2) - flow) / band) + # TODO: Check that the undersampling rate is always commensurate + # with the indexing: that is to say that + # mod(etime_min_idx, us_rate) == 0 always + z_j_b = tf_map[flow_idx:fhigh_idx,etime_min_idx:etime_max_idx:us_rate] + # FIXME: Deal with negative hrss^2 -- e.g. remove the event + try: + event.amplitude = measure_hrss(z_j_b, unwhite_filter_ip[flow_idx:fhigh_idx], unwhite_ss_ip[flow_idx:fhigh_idx-1], white_ss_ip[flow_idx:fhigh_idx-1], fd_psd.delta_f, tmp_ts_data.delta_t, len(filter_bank[0].data.data), event.chisq_dof) + except ValueError: + event.amplitude = 0 + + print "Total number of events: %d" % len(event_list) + +Switch to new block +------------------- + +The following will move the frequency band to the next segment: :: + + tdiff = time.time() - tdiff + print "Done with this block: total %f" % tdiff + + t_idx_min += int(seg_len * (1 - window_fraction)) + t_idx_max += int(seg_len * (1 - window_fraction)) + +Extracting GPS time range +------------------------- + +We use the `LIGOTimeGPS `_ structure from the =glue.lal= package to /store the starting and ending time in the dataset to nanosecond precision and synchronized to the Global Positioning System time reference/. Once both times are defined, the range of value is stored in a semi-open interval using the `segment `_ module from the =glue.segments= package. :: + + # Starting epoch relative to GPS starting epoch + start_time = LIGOTimeGPS(args.analysis_start_time or args.gps_start_time) + # Ending epoch relative to GPS ending epoch + end_time = LIGOTimeGPS(args.analysis_end_time or args.gps_end_time) + # Represent the range of values in the semi-open interval + inseg = segment(start_time,end_time) + +Prepare output file for given time range +---------------------------------------- + +:: + + xmldoc = ligolw.Document() + xmldoc.appendChild(ligolw.LIGO_LW()) + + ifo = args.channel_name.split(":")[0] + proc_row = register_to_xmldoc(xmldoc, __program__, args.__dict__, ifos=[ifo],version=glue.git_version.id, cvs_repository=glue.git_version.branch, cvs_entry_time=glue.git_version.date) + + # Figure out the data we actually analyzed + outseg = determine_output_segment(inseg, args.psd_segment_length, args.sample_rate, window_fraction) + + ss = append_search_summary(xmldoc, proc_row, ifos=(station,), inseg=inseg, outseg=outseg) + + for sb in event_list: + sb.process_id = proc_row.process_id + sb.search = proc_row.program + #sb.ifo, sb.channel = args.channel_name.split(":") + sb.ifo, sb.channel = station, setname + + xmldoc.childNodes[0].appendChild(event_list) + fname = make_filename(station, inseg) + + utils.write_filename(xmldoc, fname, gz=fname.endswith("gz"), verbose=True) + +Plot trigger results +-------------------- + +:: + + events = SnglBurstTable.read(fname+'.gz') + #del events[10000:] + plot = events.plot('time', 'central_freq', "duration", "bandwidth", color="snr") + #plot = events.plot('time', 'central_freq', color='snr') + #plot.set_yscale("log") + plot.set_ylim(1e-0, 250) + t0 = 1153742417 + plot.set_xlim(t0 + 0*60, t0 + 1*60) + #plot.set_xlim(t0 + 28, t0 + 32) + pyplot.axvline(t0 + 30, color='r') + cb = plot.add_colorbar(cmap='viridis') + plot.savefig("triggers.png") + +Module Access +============= + +Extract Magnetic Field Data +--------------------------- + +Extract magnetic field data from HDF5 files. + +.. currentmodule:: gdas.retrieve + +.. autosummary:: + :toctree: generated/ + + magfield + file_to_segment + construct_utc_from_metadata + generate_timeseries + create_activity_list + retrieve_data_timeseries + retrieve_channel_data + +Plotting routines +----------------- + +Methods to produce time-frequency plots and others + +.. currentmodule:: gdas.plots + +.. autosummary:: + :toctree: generated/ + + plot_activity + plot_time_series + plot_asd + plot_whitening + plot_ts + plot_spectrum + plot_spectrogram + plot_spectrogram_from_ts + plot_triggers + +Utilities +--------- + +Independent routines to do various other things + +.. currentmodule:: gdas.utils + +.. autosummary:: + :toctree: generated/ + + create_sound + + +.. _file_to_segment: + +.. ** Extract segment information +.. +.. The starting and ending UTC times for a specific HDF5 file are determined by using the =Date=, =t0= and =t1= attributes from the metadata. The [[construct_utc_from_metadata][=construct_utc_from_metadata=]] function is then used to calculate the UTC time. Finally, the [[http://software.ligo.org/docs/glue/glue.__segments.segment-class.html][=segment=]] module from the =glue.segments= library is used to represent the range of times in a semi-open interval. +.. +.. #+BEGIN_SRC python +.. def file_to_segment(hfile,segname): +.. # Extract all atributes from the data +.. attrs = hfile[segname].attrs +.. # Define each attribute +.. dstr, t0, t1 = attrs["Date"], attrs["t0"], attrs["t1"] +.. # Construct GPS starting time from data +.. start_utc = construct_utc_from_metadata(dstr, t0) +.. # Construct GPS starting time from data +.. end_utc = construct_utc_from_metadata(dstr, t1) +.. # Represent the range of times in the semi-open interval +.. return segment(start_utc,end_utc) +.. #+END_SRC +.. +.. ** Constructing UTC from metadata +.. <> +.. +.. #+BEGIN_SRC python +.. def construct_utc_from_metadata(datestr, t0str): +.. instr = "%d-%d-%02dT" % tuple(map(int, datestr.split('/'))) +.. instr += t0str +.. t = Time(instr, format='isot', scale='utc') +.. return t.gps +.. #+END_SRC +.. +.. ** Generate time series +.. <> +.. +.. #+BEGIN_SRC python +.. def generate_timeseries(data_list, setname="MagneticFields"): +.. full_data = TimeSeriesList() +.. for seg in sorted(data_list): +.. hfile = h5py.File(data_list[seg], "r") +.. full_data.append(retrieve_data_timeseries(hfile, "MagneticFields")) +.. hfile.close() +.. return full_data +.. #+END_SRC +.. +.. ** Retrieve data time series +.. <> +.. +.. #+BEGIN_SRC python +.. def retrieve_data_timeseries(hfile, setname): +.. dset = hfile[setname] +.. sample_rate = dset.attrs["SamplingRate(Hz)"] +.. gps_epoch = construct_utc_from_metadata(dset.attrs["Date"], dset.attrs["t0"]) +.. data = retrieve_channel_data(hfile, setname) +.. ts_data = TimeSeries(data, sample_rate=sample_rate, epoch=gps_epoch) +.. return ts_data +.. #+END_SRC +.. +.. ** Retrieve channel data +.. <> +.. +.. #+BEGIN_SRC python +.. def retrieve_channel_data(hfile, setname): +.. return hfile[setname][:] +.. #+END_SRC +.. +.. .. _calculate_spectral_correlation: +.. +.. ** Two point spectral correlation +.. +.. For our data, we apply a Tukey window whose flat bit corresponds to =window_fraction= (in percentage) of the segment length (in samples) used for PSD estimation (i.e. =fft_window_len=). This can be done by using the [[http://software.ligo.org/docs/lalsuite/lal/_window_8c_source.html#l00597][=CreateTukeyREAL8Window=]] module from the =lal= library. +.. +.. #+BEGIN_SRC python +.. def calculate_spectral_correlation(fft_window_len, wtype='hann', window_fraction=None): +.. if wtype == 'hann': +.. window = lal.CreateHannREAL8Window(fft_window_len) +.. elif wtype == 'tukey': +.. window = lal.CreateTukeyREAL8Window(fft_window_len, window_fraction) +.. else: +.. raise ValueError("Can't handle window type %s" % wtype) +.. #+END_SRC +.. +.. Once the window is built, a new frequency plan is created which will help performing a [[http://fourier.eng.hmc.edu/e101/lectures/fourier_transform_d/node1.html][forward transform]] on the data. This is done with the [[http://software.ligo.org/docs/lalsuite/lal/group___real_f_f_t__h.html#gac4413752db2d19cbe48742e922670af4][=CreateForwardREAL8FFTPlan=]] module which takes as argument the total number of points in the real data and the measurement level for plan creation (here 1 stands for measuring the best plan). +.. +.. #+BEGIN_SRC python +.. fft_plan = lal.CreateForwardREAL8FFTPlan(len(window.data.data), 1) +.. #+END_SRC +.. +.. We can finally compute and return the two-point spectral correlation function for the whitened frequency series (=fft_plan=) from the window applied to the original time series using the [[http://software.ligo.org/docs/lalsuite/lal/group___time_freq_f_f_t__h.html#ga2bd5c4258eff57cc80103d2ed489e076][=REAL8WindowTwoPointSpectralCorrelation=]] module. +.. +.. #+BEGIN_SRC python +.. return window, lal.REAL8WindowTwoPointSpectralCorrelation(window, fft_plan) +.. #+END_SRC +.. +.. ** Create filter bank +.. <> +.. +.. The construction of a filter bank is fairly simple. For each channel, a frequency domain channel filter function will be created using the [[http://software.ligo.org/docs/lalsuite/lalburst/group___e_p_search__h.html#ga899990cbd45111ba907772650c265ec9][=CreateExcessPowerFilter=]] module from the =lalburst= package. Each channel filter is divided by the square root of the PSD frequency series prior to normalization, which has the effect of de-emphasizing frequency bins with high noise content, and is called "over whitening". The data and metadata are finally stored in the =filter_fseries= and =filter_bank= arrays respectively. Finally, we store on a final array, called =np_filters= the all time-series generated from each filter so that we can plot them afterwards +.. +.. #+BEGIN_SRC python +.. def create_filter_bank(delta_f, flow, band, nchan, psd, spec_corr): +.. lal_psd = psd.lal() +.. lal_filters, np_filters = [],[] +.. for i in range(nchan): +.. lal_filter = lalburst.CreateExcessPowerFilter(flow + i*band, band, lal_psd, spec_corr) +.. np_filters.append(Spectrum.from_lal(lal_filter)) +.. lal_filters.append(lal_filter) +.. return filter_fseries, lal_filters, np_filters +.. #+END_SRC +.. +.. ** Compute filter inner products with themselves +.. <> +.. #+BEGIN_SRC python +.. def compute_filter_ips_self(lal_filters, spec_corr, psd=None): +.. """ +.. Compute a set of inner products of input filters with themselves. If psd +.. argument is given, the unwhitened filter inner products will be returned. +.. """ +.. return numpy.array([lalburst.ExcessPowerFilterInnerProduct(f, f, spec_corr, psd) for f in lal_filters]) +.. #+END_SRC +.. +.. ** Compute filter inner products with adjecant filters +.. <> +.. +.. #+BEGIN_SRC python +.. def compute_filter_ips_adjacent(lal_filters, spec_corr, psd=None): +.. """ +.. Compute a set of filter inner products between input adjacent filters. +.. If psd argument is given, the unwhitened filter inner products will be +.. returned. The returned array index is the inner product between the +.. lal_filter of the same index, and its (array) adjacent filter --- assumed +.. to be the frequency adjacent filter. +.. """ +.. return numpy.array([lalburst.ExcessPowerFilterInnerProduct(f1, f2, spec_corr, psd) for f1, f2 in zip(lal_filters[:-1], lal_filters[1:])]) +.. #+END_SRC +.. +.. .. _compute_channel_renomalization: +.. +.. Compute channel renormalization +.. ------------------------------- +.. +.. Compute the renormalization for the base filters up to a given bandwidth. +.. +.. #+BEGIN_SRC python +.. def compute_channel_renomalization(nc_sum, lal_filters, spec_corr, nchans, verbose=True): +.. mu_sq = (nc_sum+1)*numpy.array([lalburst.ExcessPowerFilterInnerProduct(f, f, spec_corr, None) for f in lal_filters]) +.. # Uncomment to get all possible frequency renormalizations +.. #for n in xrange(nc_sum, nchans): # channel position index +.. for n in xrange(nc_sum, nchans, nc_sum+1): # channel position index +.. for k in xrange(0, nc_sum): # channel sum index +.. # FIXME: We've precomputed this, so use it instead +.. mu_sq[n] += 2*lalburst.ExcessPowerFilterInnerProduct(lal_filters[n-k], lal_filters[n-1-k], spec_corr, None) +.. #print mu_sq[nc_sum::nc_sum+1] +.. return mu_sq +.. #+END_SRC +.. +.. ** Measure root-sum-square strain (hrss) +.. <> +.. +.. #+BEGIN_SRC python +.. def measure_hrss(z_j_b, uw_ss_ii, uw_ss_ij, w_ss_ij, delta_f, delta_t, filter_len, dof): +.. """ +.. Approximation of unwhitened sum of squares signal energy in a given EP tile. +.. See T1200125 for equation number reference. +.. z_j_b - time frequency map block which the constructed tile covers +.. uw_ss_ii - unwhitened filter inner products +.. uw_ss_ij - unwhitened adjacent filter inner products +.. w_ss_ij - whitened adjacent filter inner products +.. delta_f - frequency binning of EP filters +.. delta_t - native time resolution of the time frequency map +.. filter_len - number of samples in a fitler +.. dof - degrees of freedom in the tile (twice the time-frequency area) +.. """ +.. s_j_b_avg = uw_ss_ii * delta_f / 2 +.. # unwhitened sum of squares of wide virtual filter +.. s_j_nb_avg = uw_ss_ii.sum() / 2 + uw_ss_ij.sum() +.. s_j_nb_avg *= delta_f +.. s_j_nb_denom = s_j_b_avg.sum() + 2 * 2 / filter_len * \ +.. numpy.sum(numpy.sqrt(s_j_b_avg[:-1] * s_j_b_avg[1:]) * w_ss_ij) +.. # eqn. 62 +.. uw_ups_ratio = s_j_nb_avg / s_j_nb_denom +.. # eqn. 63 -- approximation of unwhitened signal energy time series +.. # FIXME: The sum in this equation is over nothing, but indexed by frequency +.. # I'll make that assumption here too. +.. s_j_nb = numpy.sum(z_j_b.T * numpy.sqrt(s_j_b_avg), axis=0) +.. s_j_nb *= numpy.sqrt(uw_ups_ratio / filter_len * 2) +.. # eqn. 64 -- approximate unwhitened signal energy minus noise contribution +.. # FIXME: correct axis of summation? +.. return math.sqrt(numpy.sum(numpy.absolute(s_j_nb)**2) * delta_t - s_j_nb_avg * dof * delta_t) +.. #+END_SRC +.. +.. ** Unwhitened inner products filtering +.. <> +.. +.. #+BEGIN_SRC python +.. # < s^2_j(f_1, b) > = 1 / 2 / N * \delta_t EPIP{\Theta, \Theta; P} +.. def uw_sum_sq(filter1, filter2, spec_corr, psd): +.. return lalburst.ExcessPowerFilterInnerProduct(filter1, filter2, spec_corr, psd) +.. #+END_SRC +.. +.. ** Unwhitened sum of squares signal +.. <> +.. +.. #+BEGIN_SRC python +.. def measure_hrss_slowly(z_j_b, lal_filters, spec_corr, psd, delta_t, dof): +.. """ +.. Approximation of unwhitened sum of squares signal energy in a given EP tile. +.. See T1200125 for equation number reference. NOTE: This function is deprecated +.. in favor of measure_hrss, since it requires recomputation of many inner products, +.. making it particularly slow. +.. """ +.. # FIXME: Make sure you sum in time correctly +.. # Number of finest bands in given tile +.. nb = len(z_j_b) +.. # eqn. 56 -- unwhitened mean square of filter with itself +.. uw_ss_ii = numpy.array([uw_sum_sq(lal_filters[i], lal_filters[i], spec_corr, psd) for i in range(nb)]) +.. s_j_b_avg = uw_ss_ii * lal_filters[0].deltaF / 2 +.. # eqn. 57 -- unwhitened mean square of filter with adjacent filter +.. uw_ss_ij = numpy.array([uw_sum_sq(lal_filters[i], lal_filters[i+1], spec_corr, psd) for i in range(nb-1)]) +.. # unwhitened sum of squares of wide virtual filter +.. s_j_nb_avg = uw_ss_ii.sum() / 2 + uw_ss_ij.sum() +.. s_j_nb_avg *= lal_filters[0].deltaF +.. +.. # eqn. 61 +.. w_ss_ij = numpy.array([uw_sum_sq(lal_filters[i], lal_filters[i+1], spec_corr, None) for i in range(nb-1)]) +.. s_j_nb_denom = s_j_b_avg.sum() + 2 * 2 / len(lal_filters[0].data.data) * \ +.. (numpy.sqrt(s_j_b_avg[:-1] * s_j_b_avg[1:]) * w_ss_ij).sum() +.. +.. # eqn. 62 +.. uw_ups_ratio = s_j_nb_avg / s_j_nb_denom +.. +.. # eqn. 63 -- approximation of unwhitened signal energy time series +.. # FIXME: The sum in this equation is over nothing, but indexed by frequency +.. # I'll make that assumption here too. +.. s_j_nb = numpy.sum(z_j_b.T * numpy.sqrt(s_j_b_avg), axis=0) +.. s_j_nb *= numpy.sqrt(uw_ups_ratio / len(lal_filters[0].data.data) * 2) +.. # eqn. 64 -- approximate unwhitened signal energy minus noise contribution +.. # FIXME: correct axis of summation? +.. return math.sqrt((numpy.absolute(s_j_nb)**2).sum() * delta_t - s_j_nb_avg * dof * delta_t) +.. #+END_SRC +.. +.. ** Measure root-mean square strain poorly +.. <> +.. +.. #+BEGIN_SRC python +.. def measure_hrss_poorly(tile_energy, sub_psd): +.. return math.sqrt(tile_energy / numpy.average(1.0 / sub_psd) / 2) +.. #+END_SRC +.. +.. ** List triggers from map +.. <> +.. +.. #+BEGIN_SRC python +.. def trigger_list_from_map(tfmap, event_list, threshold, start_time, start_freq, duration, band, df, dt, psd=None): +.. +.. # FIXME: If we don't convert this the calculation takes forever --- but we should convert it once and handle deltaF better later +.. if psd is not None: +.. npy_psd = psd.numpy() +.. +.. start_time = LIGOTimeGPS(float(start_time)) +.. ndof = 2 * duration * band +.. +.. spanf, spant = tfmap.shape[0] * df, tfmap.shape[1] * dt +.. print "Processing %.2fx%.2f time-frequency map." % (spant, spanf) +.. +.. for i, j in zip(*numpy.where(tfmap > threshold)): +.. event = event_list.RowType() +.. +.. # The points are summed forward in time and thus a `summed point' is the +.. # sum of the previous N points. If this point is above threshold, it +.. # corresponds to a tile which spans the previous N points. However, th +.. # 0th point (due to the convolution specifier 'valid') is actually +.. # already a duration from the start time. All of this means, the + +.. # duration and the - duration cancels, and the tile 'start' is, by +.. # definition, the start of the time frequency map if j = 0 +.. # FIXME: I think this needs a + dt/2 to center the tile properly +.. event.set_start(start_time + float(j * dt)) +.. event.set_stop(start_time + float(j * dt) + duration) +.. event.set_peak(event.get_start() + duration / 2) +.. event.central_freq = start_freq + i * df + 0.5 * band +.. +.. event.duration = duration +.. event.bandwidth = band +.. event.chisq_dof = ndof +.. +.. event.snr = math.sqrt(tfmap[i,j] / event.chisq_dof - 1) +.. # FIXME: Magic number 0.62 should be determine empircally +.. event.confidence = -lal.LogChisqCCDF(event.snr * 0.62, event.chisq_dof * 0.62) +.. if psd is not None: +.. # NOTE: I think the pycbc PSDs always start at 0 Hz --- check +.. psd_idx_min = int((event.central_freq - event.bandwidth / 2) / psd.delta_f) +.. psd_idx_max = int((event.central_freq + event.bandwidth / 2) / psd.delta_f) +.. +.. # FIXME: heuristically this works better with E - D -- it's all +.. # going away with the better h_rss calculation soon anyway +.. event.amplitude = measure_hrss_poorly(tfmap[i,j] - event.chisq_dof, npy_psd[psd_idx_min:psd_idx_max]) +.. else: +.. event.amplitude = None +.. +.. event.process_id = None +.. event.event_id = event_list.get_next_id() +.. event_list.append(event) +.. #+END_SRC +.. +.. ** Determine output segment +.. <> +.. +.. #+BEGIN_SRC python +.. def determine_output_segment(inseg, dt_stride, sample_rate, window_fraction=0.0): +.. """ +.. Given an input data stretch segment inseg, a data block stride dt_stride, the data sample rate, and an optional window_fraction, return the amount of data that can be processed without corruption effects from the window. +.. +.. If window_fration is set to 0 (default), assume no windowing. +.. """ +.. # Amount to overlap successive blocks so as not to lose data +.. window_overlap_samples = window_fraction * sample_rate +.. outseg = inseg.contract(window_fraction * dt_stride / 2) +.. +.. # With a given dt_stride, we cannot process the remainder of this data +.. remainder = math.fmod(abs(outseg), dt_stride * (1 - window_fraction)) +.. # ...so make an accounting of it +.. outseg = segment(outseg[0], outseg[1] - remainder) +.. return outseg +.. #+END_SRC +.. +.. ** Make tiles +.. <> +.. +.. #+BEGIN_SRC python +.. def make_tiles(tf_map, nc_sum, mu_sq): +.. tiles = numpy.zeros(tf_map.shape) +.. sum_filter = numpy.ones(nc_sum+1) +.. # Here's the deal: we're going to keep only the valid output and +.. # it's *always* going to exist in the lowest available indices +.. for t in xrange(tf_map.shape[1]): +.. # Sum and drop correlate tiles +.. # FIXME: don't drop correlated tiles +.. output = numpy.convolve(tf_map[:,t], sum_filter, 'valid')[::nc_sum+1] +.. #output = fftconvolve(tf_map[:,t], sum_filter, 'valid')[::nc_sum+1] +.. tiles[:len(output),t] = numpy.absolute(output) / math.sqrt(2) +.. return tiles[:len(output)]**2 / mu_sq[nc_sum::nc_sum+1].reshape(-1, 1) +.. #+END_SRC +.. +.. ** Create a time frequency map +.. <> +.. +.. In this function, we create a time frequency map with resolution similar than =tf_map= but rescale by a factor of =nc_sum= + 1. All tiles will be independent up to overlap from the original tiling. The =mu_sq= is applied to the resulting addition to normalize the outputs to be zero-mean unit-variance Gaussian variables (if the input is Gaussian). +.. +.. #+BEGIN_SRC python +.. def make_indp_tiles(tf_map, nc_sum, mu_sq): +.. tiles = tf_map.copy() +.. # Here's the deal: we're going to keep only the valid output and +.. # it's *always* going to exist in the lowest available indices +.. stride = nc_sum + 1 +.. for i in xrange(tiles.shape[0]/stride): +.. numpy.absolute(tiles[stride*i:stride*(i+1)].sum(axis=0), tiles[stride*(i+1)-1]) +.. return tiles[nc_sum::nc_sum+1].real**2 / mu_sq[nc_sum::nc_sum+1].reshape(-1, 1) +.. #+END_SRC +.. +.. ** Create output filename +.. <> +.. +.. #+BEGIN_SRC python +.. def make_filename(ifo, seg, tag="excesspower", ext="xml.gz"): +.. if isinstance(ifo, str): +.. ifostr = ifo +.. else: +.. ifostr = "".join(ifo) +.. st_rnd, end_rnd = int(math.floor(seg[0])), int(math.ceil(seg[1])) +.. dur = end_rnd - st_rnd +.. return "%s-%s-%d-%d.%s" % (ifostr, tag, st_rnd, dur, ext) +.. #+END_SRC + diff --git a/sphinx/gdas b/sphinx/gdas new file mode 120000 index 0000000..1b3efa1 --- /dev/null +++ b/sphinx/gdas @@ -0,0 +1 @@ +../gdas/ \ No newline at end of file diff --git a/img/jupyter1.png b/sphinx/img/jupyter1.png similarity index 100% rename from img/jupyter1.png rename to sphinx/img/jupyter1.png diff --git a/img/jupyter2.png b/sphinx/img/jupyter2.png similarity index 100% rename from img/jupyter2.png rename to sphinx/img/jupyter2.png diff --git a/img/overview.key b/sphinx/img/overview.key similarity index 100% rename from img/overview.key rename to sphinx/img/overview.key diff --git a/img/overview.pdf b/sphinx/img/overview.pdf similarity index 100% rename from img/overview.pdf rename to sphinx/img/overview.pdf diff --git a/img/overview.png b/sphinx/img/overview.png similarity index 100% rename from img/overview.png rename to sphinx/img/overview.png diff --git a/sphinx/index.rst b/sphinx/index.rst new file mode 100644 index 0000000..2225576 --- /dev/null +++ b/sphinx/index.rst @@ -0,0 +1,42 @@ +Welcome! +======== + +This package contains functions useful for magnetic field signal processing, with a focus on Excess Power search analysis and application on the data for the GNOME collaboration, see `Pustelny et al. (2013) `_. This documentation details all the available functions and tasks available through this software. Here are some example tasks that can (or will soon to) be handled: + +* Plot usual time series and spectrogram of magnetic field data. +* Perform excess power analysis and plot detected triggers in time-frequency map. +* Create artificial data for testing data analysis. +* Inject fake signal of different bandwidth and durations. +* Cross-correlation of continuous sine wave signals. +* Perform Allan Standard deviation. + +Should you have any questions or suggested corrections to be made, do not hesitate to `contact me `_. + +.. raw:: html + + Fork me on GitHub + +Getting Started +--------------- + +.. toctree:: + :maxdepth: 2 + + installation + server + example + +Excess Power Search Analysis +------------------- + +.. toctree:: + :maxdepth: 1 + + epower_overview + epower_step1_checkfilt + epower_step2_psd + epower_step3_speccor + epower_step4_filterbank + epower_step5_normalization + epower_step6_initialization + diff --git a/sphinx/installation.rst b/sphinx/installation.rst new file mode 100644 index 0000000..cb5ed4e --- /dev/null +++ b/sphinx/installation.rst @@ -0,0 +1,57 @@ +Installation +============ + +The program requires the following general packages to run: `Numpy `_, `Matplotlib `_, `Scipy `_ and `Astropy `_. The following LIGO-related packages are also required for full functionality: `Gwpy `_, `PyCBC `_, `Glue `_, `LAL `_, `LALburst `_ and `LALsimulation `_. + +While most of the packages can be installed automatically using `pip `_, some LIGO packages (Glue, LAL, LALburst and LALsimulation) must be installed separately beforehand as they contain several C routines that need specific compilation. However, these packages are already included in a bigger package called `LALsuite `_ which can be installed fairly easily on Debian (Linux) and Mac OS machines. + +LALsuite tools +-------------- + +Some useful pages on how to download and install the LIGO software can be found `here `_. + +MacPorts (Mac) +~~~~~~~~~~~~~~ + +For Mac users, the installation is pretty easy, detailed information can be found on `this page `_. You need to have `MacPorts `_ installed. The following commands should suffice to install the LALsuite package on your machine:: + + sudo port install lscsoft-deps + sudo port install glue + sudo port install lalapps + +The first command will install all the dependencies needed for the LIGO software to be installed. The following 2 commands will install the actual packages. + +apt-get (Debian) +~~~~~~~~~~~~~~~~ + +Since the LIGO software is not a default package in the apt package manager system on Debian machine, additional steps will be needed. The first step is to add the following links to the source list located at ``/etc/apt/sources.list``:: + + deb [arch=amd64] http://software.ligo.org/lscsoft/debian jessie contrib + deb-src [arch=amd64] http://software.ligo.org/lscsoft/debian jessie contrib + +Note that the ``[arch=amd64]`` is needed to fix the architecture problem in case it tries to install i386 version on 64-bit Debian. Once the sources have been added, you must first install all the dependencies as follows:: + + apt-get install build-essential automake autoconf libtool devscripts + +The LIGO software can finally be installed using the following command:: + + apt-get install lscsoft-all + +Main Program +------------ + +The best way to install the GNOME software along with the rest of the dependencies is by using `pip`:: + + pip install gdas + +(You may need to put a ``sudo`` in front of this). For this to work +you need to have `pip +`_ installed. This +method allows for easy uninstallation. + +You can also simply download the tarball from the PyPI website, unpack it and then do:: + + python setup.py install + +The latest stable package can be downloaded from PyPI: https://pypi.python.org/pypi/gdas. +The development version can be downloaded from `here `_. diff --git a/sphinx/server.rst b/sphinx/server.rst new file mode 100644 index 0000000..b8b5172 --- /dev/null +++ b/sphinx/server.rst @@ -0,0 +1,20 @@ +Multi-user Server +================= + +A GNOME JupyterHub, or multi-user server has been created to allow each member to access the entire available dataset. Member who do not have access to the server but wish to access it should send a request to Dr. Sam Afach. Member who are not part of the GNOME collaboration will not be granted access to the dataset but are free to use our software on their own data. + +The server can be accessed in two ways, either by acceding the `server's webpage `_, or from your terminal through SSH:: + + ssh -X username@budker.uni-mainz.de -p 8022 + +While SSH is very handy for people using UNIX-like operating systems, this can become more complicated for those working on Windows machines. Fortunately, access to a terminal is also possible through the webpage, which means directly from your internet browser! This can be done by clicking on the New tab after login and select Terminal: + +.. figure:: img/jupyter1.png + :width: 70% + :align: center + +You can then use the terminal window to access files and create new Python scripts for your analysis. + +.. figure:: img/jupyter2.png + :width: 70% + :align: center