Skip to content

Commit

Permalink
Full multiprocessing support added
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-l-kong committed Oct 25, 2024
1 parent dcec39e commit 87e5a68
Showing 1 changed file with 0 additions and 22 deletions.
22 changes: 0 additions & 22 deletions src/maldi_tools/load_maldi_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,28 +105,19 @@ def extract_maldi_tsf_data(
mz_bins: np.ndarray = generate_mz_bins(min_mz, max_mz)
spectra_dict: Dict[float, float] = {}
tsf_spot_info: pd.DataFrame = tsf_cursor.analysis["Frames"]
with open("sample_log.txt", "a") as outfile:
outfile.write(f"Processing {tsf_spot_info.shape[0]} spots")
print(f"Processing {tsf_spot_info.shape[0]} spots")
for sid in tsf_spot_info["Id"].values:
# assert sid < len(tsf_spot_info)
index_arr, intensity_arr = tsf_read_line_spectrum_v2(
tdf_sdk=tdf_sdk_binary, handle=tsf_cursor.handle, frame_id=sid
)
# assert sid < len(mz_arr)
mz_arr: np.ndarray = tsf_index_to_mz(
tdf_sdk=tdf_sdk_binary, handle=tsf_cursor.handle, frame_id=sid, indices=index_arr
)

for mz, intensity in zip(mz_arr, intensity_arr):
# assert bisect_left(mz_bins, mz) < len(mz_bins)
binned_mz = mz_bins[bisect_left(mz_bins, mz)]
spectra_dict[binned_mz] = (
0 if binned_mz not in spectra_dict else spectra_dict[binned_mz]
) + intensity
if sid % 5000 == 0:
with open("sample_log.txt", "a") as outfile:
outfile.write(f"Processed {tsf_spot_info.shape[0]} spots")

run_name = os.path.basename(os.path.splitext(maldi_data_path)[0])
tsf_spectra: pd.DataFrame = pd.DataFrame(spectra_dict.items(), columns=["m/z", "intensity"])
Expand Down Expand Up @@ -170,7 +161,6 @@ def extract_maldi_run_spectra(
spectra_df: pd.DataFrame = pd.DataFrame()

with ProcessPoolExecutor(max_workers=num_workers) as executor:
print("Starting process pool")
future_maldi_data = {
executor.submit(extract_maldi_tsf_data, mp, min_mz, max_mz): mp for mp in maldi_paths
}
Expand All @@ -183,18 +173,6 @@ def extract_maldi_run_spectra(
spectra_df = pd.concat([spectra_df, spectra_mp])
except Exception as e:
print(f"Exception raised while processing {mp}")
print(e)

# for mp in maldi_paths:
# print(f"Processing data {mp}")
# start = default_timer()
# poslog_mp, spectra_mp = extract_maldi_tsf_data(
# mp, min_mz, max_mz
# )
# poslog_df = pd.concat([poslog_df, poslog_mp])
# spectra_df = pd.concat([spectra_df, spectra_mp])
# end = default_timer()
# print(f"Total time to process: {end - start}")

poslog_df = poslog_df.reset_index(drop=True)
spectra_df = spectra_df.reset_index(drop=True)
Expand Down

0 comments on commit 87e5a68

Please sign in to comment.