diff --git a/docs/sec_examples.rst b/docs/sec_examples.rst index ec01f65..4265126 100644 --- a/docs/sec_examples.rst +++ b/docs/sec_examples.rst @@ -14,3 +14,5 @@ Examples .. fancy_include:: fourier_scale.py .. fancy_include:: fft_options.py + +.. fancy_include:: fft_cupy3d_speed.py diff --git a/examples/fft_cupy3d_speed.png b/examples/fft_cupy3d_speed.png new file mode 100644 index 0000000..d18c5a9 Binary files /dev/null and b/examples/fft_cupy3d_speed.png differ diff --git a/examples/fft_cupy3d_speed.py b/examples/fft_cupy3d_speed.py new file mode 100644 index 0000000..27b5a76 --- /dev/null +++ b/examples/fft_cupy3d_speed.py @@ -0,0 +1,64 @@ +"""Fourier Transform speeds for the Cupy 3D interface + +This example visualizes the normalised speed for different 3D image stacks for +the `FFTFilterCupy3D` FFT Filter + +- Optimum stack size for 256 images (incl. padding) is bewteen 128 and 256. + +""" +import time +import matplotlib.pylab as plt +import numpy as np +import qpretrieve + +# load the experimental data +edata = np.load("./data/hologram_cell.npz") + +n_transforms_list = [8, 16, 32, 64, 128, 256, 512] +subtract_mean = True +padding = True +fft_interface = qpretrieve.fourier.FFTFilterCupy3D + +results = {} +for n_transforms in n_transforms_list: + print(f"Running {n_transforms} transforms...") + + data_2d = edata["data"].copy() + data_2d_bg = edata["bg_data"].copy() + data_3d = np.repeat( + edata["data"].copy()[np.newaxis, ...], + repeats=n_transforms, axis=0) + data_3d_bg = np.repeat( + edata["bg_data"].copy()[np.newaxis, ...], + repeats=n_transforms, axis=0) + assert data_3d.shape == data_3d_bg.shape == (n_transforms, + edata["data"].shape[0], + edata["data"].shape[1]) + + t0 = time.time() + + holo = qpretrieve.OffAxisHologram(data=data_3d, + fft_interface=fft_interface, + subtract_mean=subtract_mean, padding=padding) + holo.run_pipeline(filter_name="disk", filter_size=1 / 2) + bg = qpretrieve.OffAxisHologram(data=data_3d_bg) + bg.process_like(holo) + + t1 = time.time() + results[n_transforms] = t1 - t0 + +speed_norm = [v / k for k, v in results.items()] + +fig, axes = plt.subplots(1, 1, figsize=(8, 5)) +ax1 = axes + +ax1.bar(range(len(n_transforms_list)), height=speed_norm, color='darkmagenta') +ax1.set_xticks(range(len(n_transforms_list)), labels=n_transforms_list) +ax1.set_xlabel("Number of Transforms") +ax1.set_ylabel("Speed normalised by number of transforms (s)") +ax1.set_title(f"Normalised by number of transforms") + +plt.suptitle("Speed of CuPy 3D") +plt.tight_layout() +# plt.show() +plt.savefig("fft_cupy3d_speed.png", dpi=150) diff --git a/examples/fft_options.png b/examples/fft_options.png index 5493026..730666d 100644 Binary files a/examples/fft_options.png and b/examples/fft_options.png differ diff --git a/examples/fft_options.py b/examples/fft_options.py index 6a2ebe6..17789b5 100644 --- a/examples/fft_options.py +++ b/examples/fft_options.py @@ -4,8 +4,8 @@ user for performing Fourier transforms. - PyFFTW is initially slow, but over many FFTs is very quick. -- CuPy using CUDA can be very fast, but is currently limited because we are - transferring one image at a time to the GPU. +- CuPy 3D should not be used for direct comparison here, + as it is not doing post-processing padding. """ import time @@ -19,14 +19,18 @@ # get the available fft interfaces interfaces_available = qpretrieve.fourier.get_available_interfaces() -n_transforms = 100 +n_transforms = 200 +subtract_mean = True +padding = True +print("Running single transform...") # one transform results_1 = {} for fft_interface in interfaces_available: t0 = time.time() holo = qpretrieve.OffAxisHologram(data=edata["data"], - fft_interface=fft_interface) + fft_interface=fft_interface, + subtract_mean=subtract_mean, padding=padding) holo.run_pipeline(filter_name="disk", filter_size=1 / 2) bg = qpretrieve.OffAxisHologram(data=edata["bg_data"]) bg.process_like(holo) @@ -34,16 +38,44 @@ results_1[fft_interface.__name__] = t1 - t0 num_interfaces = len(results_1) + # multiple transforms (should see speed increase for PyFFTW) +print(f"Running {n_transforms} transforms...") results = {} for fft_interface in interfaces_available: + + data_2d = edata["data"].copy() + data_2d_bg = edata["bg_data"].copy() + + data_3d = np.repeat( + edata["data"].copy()[np.newaxis, ...], + repeats=n_transforms, axis=0) + data_3d_bg = np.repeat( + edata["bg_data"].copy()[np.newaxis, ...], + repeats=n_transforms, axis=0) + assert data_3d.shape == data_3d_bg.shape == (n_transforms, + edata["data"].shape[0], + edata["data"].shape[1]) + t0 = time.time() - for _ in range(n_transforms): - holo = qpretrieve.OffAxisHologram(data=edata["data"], - fft_interface=fft_interface) + + if fft_interface.__name__ == "FFTFilterCupy3D": + holo = qpretrieve.OffAxisHologram(data=data_3d, + fft_interface=fft_interface, + subtract_mean=subtract_mean, padding=padding) holo.run_pipeline(filter_name="disk", filter_size=1 / 2) - bg = qpretrieve.OffAxisHologram(data=edata["bg_data"]) + bg = qpretrieve.OffAxisHologram(data=data_3d_bg) bg.process_like(holo) + else: + # 2d + for _ in range(n_transforms): + holo = qpretrieve.OffAxisHologram(data=data_2d, + fft_interface=fft_interface, + subtract_mean=subtract_mean, padding=padding) + holo.run_pipeline(filter_name="disk", filter_size=1 / 2) + bg = qpretrieve.OffAxisHologram(data=edata["bg_data"]) + bg.process_like(holo) + t1 = time.time() results[fft_interface.__name__] = t1 - t0 num_interfaces = len(results) @@ -66,8 +98,10 @@ ax2.set_xticks(range(num_interfaces), labels=labels, rotation=45) ax2.set_ylabel("Speed (s)") -ax2.set_title(f"{n_transforms} Transforms") +# todo: fix code, then this title +ax2.set_title(f"{n_transforms} Transforms\n(**Cupy comparison not valid)") plt.suptitle("Speed of FFT Interfaces") plt.tight_layout() -plt.show() +# plt.show() +plt.savefig("fft_options.png", dpi=150) diff --git a/qpretrieve/fourier/base.py b/qpretrieve/fourier/base.py index 1a531a2..eb7173b 100644 --- a/qpretrieve/fourier/base.py +++ b/qpretrieve/fourier/base.py @@ -260,6 +260,9 @@ def filter(self, filter_name: str, filter_size: float, fft_used = fft_used[cslice, cslice] field = self._ifft(np.fft.ifftshift(fft_used)) + if len(self.origin.shape) != 2: + # todo: this must be corrected + self.padding = False if self.padding: # revert padding sx, sy = self.origin.shape