generalize data reader using rsciio

CEA-MetroCarac · Aug 5, 2024 · 97ef0fe · 97ef0fe
1 parent 940084e
commit 97ef0fe
Show file tree

Hide file tree

Showing 5 changed files with 155 additions and 27 deletions.
diff --git a/fitspy/app/callbacks.py b/fitspy/app/callbacks.py
@@ -15,7 +15,7 @@
 from fitspy.spectra import Spectra
 from fitspy.spectra_map import SpectraMap
 from fitspy.spectrum import Spectrum
-from fitspy.utils import closest_index, check_or_rename
+from fitspy.utils import get_dim, closest_index, check_or_rename
 from fitspy.utils import load_models_from_txt, load_models_from_py
 from fitspy import CMAP
 
@@ -917,20 +917,25 @@ def add_items(self, fnames=None):
         for fname in self.fileselector.filenames:
             if fname not in self.spectra.fnames:
 
-                # 2D-map detection
-                if os.path.isfile(fname):
-                    with open(fname, 'r') as fid:
-                        if fid.readline()[0] == "\t":
-                            self.create_map(fname)
-                            return
+                dim = get_dim(fname)
 
-                if fname_first_item is None:
-                    fname_first_item = fname
+                if dim is None:
+                    msg = "The file {} can not be interpreted by fitspy"
+                    showerror(message=msg.format(Path(fname).name))
+                    return
 
-                spectrum = Spectrum()
-                spectrum.load_profile(fname)
-                spectrum.attractors_params = attractors_params
-                self.spectra.append(spectrum)
+                elif dim == 2:
+                    self.create_map(fname)
+                    return
+
+                else:  # dim == 1
+                    if fname_first_item is None:
+                        fname_first_item = fname
+
+                    spectrum = Spectrum()
+                    spectrum.load_profile(fname)
+                    spectrum.attractors_params = attractors_params
+                    self.spectra.append(spectrum)
 
         self.update(fname=fname_first_item or self.fileselector.filenames[0])
 

diff --git a/fitspy/app/utils.py b/fitspy/app/utils.py
@@ -291,9 +291,7 @@ def select_files(self, filenames=None):
         """ Add items from selected files """
 
         if filenames is None:
-            filetypes = (('', '*.txt'), ('All files', '*.*'))
-            filenames = fd.askopenfilenames(title='Select file(s)',
-                                            filetypes=filetypes)
+            filenames = fd.askopenfilenames(title='Select file(s)')
         self.add_items(filenames=filenames)
 
         self.lbox.event_generate('<<ListboxAdd>>')

diff --git a/fitspy/spectra_map.py b/fitspy/spectra_map.py
@@ -7,7 +7,7 @@
 from matplotlib.widgets import RangeSlider
 from parse import Parser
 
-from fitspy.utils import closest_index
+from fitspy.utils import closest_index, get_2d_map
 from fitspy.spectra import Spectra
 from fitspy.spectrum import Spectrum
 
@@ -70,11 +70,9 @@ def __init__(self):
         self.xrange = None
 
     def create_map(self, fname):
-        """ Create map from .txt file issued from labspec files conversion """
+        """ Create map """
 
-        self.fname = fname
-        dfr = pd.read_csv(fname, sep='\t', header=None)
-        arr = dfr.to_numpy()
+        arr = get_2d_map(fname)
 
         x_map = x = list(np.sort(np.unique(arr[1:, 1])))
         y_map = y = list(np.sort(np.unique(arr[1:, 0])))
@@ -112,6 +110,7 @@ def create_map(self, fname):
             self.append(spectrum)
             coords.append([vals[1], vals[0]])
 
+        self.fname = fname
         self.xy_map = (x_map, y_map)
         self.shape_map = (len(self.xy_map[1]), len(self.xy_map[0]))
         self.extent = [xmin, xmax, ymin, ymax]

diff --git a/fitspy/spectrum.py b/fitspy/spectrum.py
@@ -8,7 +8,6 @@
 from copy import deepcopy
 import warnings
 import numpy as np
-import pandas as pd
 from scipy.signal import find_peaks
 from scipy.interpolate import interp1d
 from scipy.ndimage import uniform_filter1d
@@ -17,6 +16,7 @@
 from lmfit.models import ConstantModel, LinearModel, ParabolicModel, \
     ExponentialModel, ExpressionModel  # pylint:disable=unused-import
 
+from fitspy.utils import get_1d_profile
 from fitspy.utils import closest_index, fileparts, check_or_rename
 from fitspy.utils import save_to_json, load_from_json
 from fitspy.baseline import BaseLine
@@ -245,17 +245,15 @@ def load_profile(self, fname, xmin=None, xmax=None):
 
         # raw profile loading
         if self.x0 is None:
-            self.fname = fname
-            dfr = pd.read_csv(self.fname, sep=r'\s+|\t|,|;| ', engine='python',
-                              skiprows=1, usecols=[0, 1], names=['x0', 'y0'])
-            x0 = dfr['x0'].to_numpy()
-            y0 = dfr['y0'].to_numpy()
+            x0, y0 = get_1d_profile(fname)
 
             # reordering
             inds = np.argsort(x0)
             self.x0 = x0[inds]
             self.y0 = y0[inds]
 
+            self.fname = fname
+
         # (re)initialization or cropping
         if self.range_min is None:
             self.range_min = self.x0.min()

diff --git a/fitspy/utils.py b/fitspy/utils.py
@@ -5,9 +5,13 @@
 import re
 import json
 from pathlib import Path
+import importlib
+import itertools
 import runpy
 import numpy as np
+import pandas as pd
 from lmfit.models import ExpressionModel
+from rsciio import IO_PLUGINS
 
 
 def closest_item(element_list, value):
@@ -149,3 +153,127 @@ def load_models_from_py(fname):
     """ Load models from '.py' file (See the documentation for more details) """
     if Path(fname).exists():
         runpy.run_path(fname)
+
+
+# def converter(fname, fname_res=None):
+#     """ Convert input data using HyperSpy from different formats (.spx, .emd,
+#         .dm3, ...) to .txt fitspy compatible format """
+#
+#     if fname_res is None:
+#         fname_res = Path(fname).with_suffix(".txt")
+#
+#     try:
+#         import hyperspy.api as hs
+#     except ImportError:
+#         raise ImportError('hyperspy must be installed')
+#
+#     signal = hs.load(fname)
+#     support = signal.axes_manager[-1].axis
+#
+#     # 1D spectrum
+#     if signal.data.ndim == 1:
+#         with open(fname_res, mode='w') as fid:
+#             fid.write('#support\t#intensity')
+#             for x, intensity in zip(support, signal.data):
+#                 fid.write(f"\n{x}\t{intensity}")
+#
+#     # 2D map
+#     elif signal.data.ndim == 3:
+#         with open(fname_res, mode='w') as fid:
+#             fid.write("\t\t" + "\t".join(map(str, support)))
+#             for i in range(signal.data.shape[0]):
+#                 for j in range(signal.data.shape[1]):
+#                     intens = signal.data[i, j, :]
+#                     fid.write(f"\n{i}\t{j}\t" + "\t".join(map(str, intens)))
+#
+#     else:
+#         raise NotImplementedError
+
+
+def get_dim(fname):
+    """ Return the dimension (1, 2 or None) of the spectrum/spectra field """
+
+    dim = None
+
+    if Path(fname).suffix in ['.txt', '.csv']:
+        with open(fname, 'r') as fid:
+            dim = 2 if fid.readline()[0] == "\t" else 1
+
+    else:
+        reader = get_reader_from_rsciio(fname)
+        if reader is not None:
+            data = reader.file_reader(fname)[0]['data']
+            if data.ndim == 1:
+                dim = 1
+            elif data.ndim == 3:  # 2D-map
+                dim = 2
+
+    return dim
+
+
+def get_reader_from_rsciio(fname):
+    """ Return the reader object using the Rosettasciio library """
+    sfx = Path(fname).suffix[1:].lower()
+    rdrs = [rdr for rdr in IO_PLUGINS if sfx in rdr["file_extensions"]]
+    if len(rdrs) == 1:
+        reader = rdrs[0]
+        return importlib.import_module(reader["api"])
+    else:
+        return None
+
+
+def get_x_data_from_rsciio(fname):
+    """ Return the spectrum/spectra support ('x') and the related intensities
+        ('data') using the Rosettasciio library """
+
+    reader = get_reader_from_rsciio(fname)
+
+    if reader is None:
+        raise NotImplementedError(f"unreadable file {fname}")
+
+    fdict = reader.file_reader(fname)[0]
+    data = fdict['data']
+    axis = fdict['axes'][0]
+    x = axis['offset'] + axis['scale'] * np.arange(axis['size'])
+
+    return x, data
+
+
+def get_1d_profile(fname):
+    """ Return the spectrum support ('x0') and its intensity ('y0') """
+
+    if Path(fname).suffix in ['.txt', '.csv']:
+        dfr = pd.read_csv(fname,
+                          sep=r'\s+|\t|,|;| ', engine='python',
+                          skiprows=1, usecols=[0, 1],
+                          names=['x0', 'y0'])
+        x0 = dfr['x0'].to_numpy()
+        y0 = dfr['y0'].to_numpy()
+    else:
+        x0, y0 = get_x_data_from_rsciio(fname)
+        if y0.ndim != 1:
+            raise IOError(f"incorrect dimension associated with {fname}")
+
+    return x0, y0
+
+
+def get_2d_map(fname):
+    r""" Return the array related to a 2D-map.
+        For more details about the array shape, see:
+        https://cea-metrocarac.github.io/fitspy/doc/user_guide/input_data.html#d-map-spectra"
+        """
+    if Path(fname).suffix in ['.txt', '.csv']:
+        dfr = pd.read_csv(fname, sep='\t', header=None)
+        arr = dfr.to_numpy()
+    else:
+        x, data = get_x_data_from_rsciio(fname)
+        if data.ndim == 3:
+            shape = data.shape
+            inds_i, inds_j = range(shape[1]), range(shape[2])
+            inds = np.array(list(itertools.product(inds_i, inds_j)))
+            data = data.reshape(shape[0], shape[1] * shape[2])
+            arr = np.vstack((np.hstack(([0, 0], x)), np.hstack((inds, data.T))))
+        else:
+            raise IOError(f"incorrect dimension associated with {fname}")
+
+    return arr