From 9b6cc37d501684f45193251544fa6e6dadb86da6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Thu, 16 May 2024 15:07:03 +0200 Subject: [PATCH] Add Pylint (#55) * Disable PSM3 in CI pipelines * Fix filepaths and variable names * Python best coding practices Address pylint rules W0611, W0102, W1401, E1120, C0121, C0123, C0325, R0124, R1714. * Fix f-strings and incorrect indentation levels Address pylint rules W0311, W1309. * Add pylint * Rewrite serialization code with safer alternatives Address Pylint rules W0123, R1722. * fix sample script * fix bug when standarizing data from Landgesell * fix bug in peptide section of the tutorial * clean-up output from tutorials * Allow custom Python executables * Comply with output directory flag from argparse * fix on running globular_protein script * Fix exit strategy * remove not plot condition * Fix unit system * remove code repetition in standarize_data.py --------- Co-authored-by: blancoapa Co-authored-by: blancoapa Co-authored-by: Pao --- .github/workflows/testsuite.yml | 12 +- .pylintrc | 45 ++ Makefile | 42 +- lib/analysis.py | 4 +- lib/create_cg_from_pdb.py | 303 ++++----- lib/handy_functions.py | 2 +- maintainer/configure_venv.py | 2 +- maintainer/standarize_data.py | 18 +- pyMBE.py | 380 +++++------ samples/Beyer2024/README.md | 12 +- samples/Beyer2024/create_paper_data.py | 32 +- samples/Beyer2024/globular_protein.py | 18 +- samples/Beyer2024/peptide.py | 7 - .../weak_polyelectrolyte_dialysis.py | 5 - samples/branched_polyampholyte.py | 20 +- samples/peptide.py | 47 +- samples/peptide_mixture_grxmc_ideal.py | 24 +- samples/plot_HH.py | 6 +- samples/salt_solution_gcmc.py | 11 +- testsuite/bond_tests.py | 95 ++- testsuite/cph_ideal_tests.py | 16 +- testsuite/create_molecule_position_test.py | 19 +- testsuite/gcmc_tests.py | 24 +- .../generate_perpendicular_vectors_test.py | 19 +- testsuite/globular_protein_tests.py | 17 +- testsuite/grxmc_ideal_tests.py | 22 +- testsuite/henderson_hasselbalch_tests.py | 19 +- testsuite/lj_tests.py | 40 +- testsuite/parameter_test.py | 5 +- testsuite/peptide_tests.py | 10 +- testsuite/read-write-df_test.py | 36 +- testsuite/seed_test.py | 10 +- testsuite/set_particle_acidity_test.py | 22 +- .../weak_polyelectrolyte_dialysis_test.py | 13 +- tutorials/pyMBE_tutorial.ipynb | 622 ++---------------- tutorials/solution_tutorial.ipynb | 23 +- visualization/create_vmd_script.py | 104 +-- visualization/vmd-traj.py | 95 +-- 38 files changed, 784 insertions(+), 1417 deletions(-) create mode 100644 .pylintrc diff --git a/.github/workflows/testsuite.yml b/.github/workflows/testsuite.yml index d2e41b5..a8bd5ab 100644 --- a/.github/workflows/testsuite.yml +++ b/.github/workflows/testsuite.yml @@ -10,6 +10,9 @@ permissions: jobs: ubuntu: runs-on: ubuntu-latest + env: + FI_PROVIDER: "^psm3,psm3;ofi_rxd" + OMPI_MCA_mtl_ofi_provider_exclude: psm3 steps: - name: Setup EESSI uses: eessi/github-action-eessi@v3 @@ -20,16 +23,17 @@ jobs: - name: Install dependencies run: | module load ESPResSo/4.2.1-foss-2023a - python3 -m venv --system-site-packages pymbe - source pymbe/bin/activate + python3 -m venv --system-site-packages venv + source venv/bin/activate python3 maintainer/configure_venv.py python3 -m pip install -r requirements.txt - python3 -m pip install "pdoc==14.3" + python3 -m pip install "pdoc==14.3" "pylint==3.0.3" deactivate - name: Run testsuite run: | module load ESPResSo/4.2.1-foss-2023a - source pymbe/bin/activate + source venv/bin/activate + make pylint make tests make docs deactivate diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..1804a2b --- /dev/null +++ b/.pylintrc @@ -0,0 +1,45 @@ +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=all + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=dangerous-default-value, # W0102 + duplicate-key, # W0109 + eval-used, # W0123 + assert-on-tuple, # W0199 + bad-indentation, # W0311 + wildcard-import, # W0401 + unused-import, # W0611 + unused-variable, # W0612 + unused-argument, # W0613 + unused-wildcard-import, # W0614 + f-string-without-interpolation, # W1309 + anomalous-backslash-in-string, # W1401 + deprecated-method, # W1505 + comparison-with-itself, # R0124 + cyclic-import, # R0401 + trailing-comma-tuple, # R1707 + consider-using-in, # R1714 + consider-using-sys-exit, # R1722 + singleton-comparison, # C0121 + unidiomatic-typecheck, # C0123 + bad-classmethod-argument, # C0202 + superfluous-parens, # C0325 + undefined-variable, # E0602 + no-value-for-parameter, # E1120 diff --git a/Makefile b/Makefile index bfb501c..2ed0794 100644 --- a/Makefile +++ b/Makefile @@ -3,32 +3,38 @@ .PHONY: visual .PHONY: clean +PYTHON = python3 + docs: mkdir -p ./documentation - pdoc ./pyMBE.py -o ./documentation --docformat google + PDOC_ALLOW_EXEC=0 ${PYTHON} -m pdoc ./pyMBE.py -o ./documentation --docformat google tests: - python3 testsuite/lj_tests.py - python3 testsuite/set_particle_acidity_test.py - python3 testsuite/bond_tests.py - python3 testsuite/generate_perpendicular_vectors_test.py - python3 testsuite/create_molecule_position_test.py - python3 testsuite/seed_test.py - python3 testsuite/read-write-df_test.py - python3 testsuite/parameter_test.py - python3 testsuite/henderson_hasselbalch_tests.py - python3 testsuite/cph_ideal_tests.py - python3 testsuite/grxmc_ideal_tests.py - python3 testsuite/peptide_tests.py - python3 testsuite/gcmc_tests.py - python3 testsuite/weak_polyelectrolyte_dialysis_test.py - python3 testsuite/globular_protein_tests.py + ${PYTHON} testsuite/lj_tests.py + ${PYTHON} testsuite/set_particle_acidity_test.py + ${PYTHON} testsuite/bond_tests.py + ${PYTHON} testsuite/generate_perpendicular_vectors_test.py + ${PYTHON} testsuite/create_molecule_position_test.py + ${PYTHON} testsuite/seed_test.py + ${PYTHON} testsuite/read-write-df_test.py + ${PYTHON} testsuite/parameter_test.py + ${PYTHON} testsuite/henderson_hasselbalch_tests.py + ${PYTHON} testsuite/cph_ideal_tests.py + ${PYTHON} testsuite/grxmc_ideal_tests.py + ${PYTHON} testsuite/peptide_tests.py + ${PYTHON} testsuite/gcmc_tests.py + ${PYTHON} testsuite/weak_polyelectrolyte_dialysis_test.py + ${PYTHON} testsuite/globular_protein_tests.py + sample: - python3 sample_scripts/peptide_simulation_example.py + ${PYTHON} samples/peptide.py visual: - python3 handy_scripts/vmd-traj.py + ${PYTHON} visualization/vmd-traj.py vmd -e visualization.tcl tutorial: jupyter-lab tutorials/pyMBE_tutorial.ipynb + +pylint: + ${PYTHON} -m pylint pyMBE.py lib/ testsuite/ samples/ maintainer/ visualization/ diff --git a/lib/analysis.py b/lib/analysis.py index 1b69a25..f82745c 100644 --- a/lib/analysis.py +++ b/lib/analysis.py @@ -307,9 +307,9 @@ def get_dt(data): warn_lines = [] for i in range(1,imax): dt = time[i] - time[i-1] - if(np.abs((dt_init - dt)/dt) > 0.01 ): + if np.abs((dt_init - dt)/dt) > 0.01: warn_lines.append("Row {} dt = {} = {} - {} not equal to dt_init = {}") - if(len(warn_lines) > 20): + if len(warn_lines) > 20: print("\n") for line in warn_lines: print(line) diff --git a/lib/create_cg_from_pdb.py b/lib/create_cg_from_pdb.py index f21f35d..18ef4d4 100644 --- a/lib/create_cg_from_pdb.py +++ b/lib/create_cg_from_pdb.py @@ -1,9 +1,10 @@ -import numpy as np -import pandas as pd +import sys import argparse +import numpy as np +import pandas as pd from biopandas.pdb import PandasPdb from genericpath import isfile -pd.options.mode.chained_assignment = None +pd.options.mode.chained_assignment = None def verbose_print(**kwargs): """ @@ -23,27 +24,27 @@ def create_biopandas_df(): Returns: biopandas_df (cls): BioPandas dataframe with the input pdb data stored """ - + if args.filename: - if isfile (args.filename): + if isfile (args.filename): verbose_print(message=f'Loading {args.filename}', verbose=args.verbose) - biopandas_df = PandasPdb() - biopandas_df.read_pdb(args.filename) + biopandas_df = PandasPdb() + biopandas_df.read_pdb(args.filename) verbose_print(message=f'Creating BioPandas data frame of: {args.filename} ', verbose=args.verbose) else: raise ValueError (f'{args.pdb_code} does not exist in this folder, STOP \n \ To download the pdb code from RCSB PDB add --download as argument in the command line ') elif args.pdb_code: verbose_print (message=f'Downloading {args.pdb_code} from RCSB PDB', verbose=args.verbose) - biopandas_df = PandasPdb().fetch_pdb(args.pdb_code) + biopandas_df = PandasPdb().fetch_pdb(args.pdb_code) verbose_print (message=f'Creating BioPandas data frame of: {args.pdb_code} ', verbose=args.verbose) - return biopandas_df + return biopandas_df -def create_pandas_df_of_the_protein_and_metal_ions (biopandas_df): +def create_pandas_df_of_the_protein_and_metal_ions (biopandas_df): """ - Creates a data frame that contains only the information of aminoacids and metal ions (if exist) in the protein + Creates a data frame that contains only the information of aminoacids and metal ions (if exist) in the protein (this is necessary because biopandas does not include all the functionalities of pandas) Args: biopandas_df (cls): biopandas data frame with all the pdb information @@ -51,13 +52,13 @@ def create_pandas_df_of_the_protein_and_metal_ions (biopandas_df): pdb_df (cls): pandas df with the protein information. """ - pdb_df_protein = pd.DataFrame(biopandas_df.df['ATOM']) + pdb_df_protein = pd.DataFrame(biopandas_df.df['ATOM']) - pdb_df_metal_ions = pd.DataFrame(biopandas_df.df['HETATM']) + pdb_df_metal_ions = pd.DataFrame(biopandas_df.df['HETATM']) pdb_df_metal_ions = pdb_df_metal_ions.loc[pdb_df_metal_ions['residue_name'] != 'HOH'] if pdb_df_metal_ions.empty: - verbose_print (message=f'It does not contain metal ions', verbose=args.verbose) + verbose_print (message='It does not contain metal ions', verbose=args.verbose) pdb_df_metal_ions ['residue_name'] = pdb_df_metal_ions['residue_name'].replace (['CA'],'Ca') pdb_df_metal_ions ['atom_name'] = pdb_df_metal_ions['atom_name'].replace (['CA'],'Ca') @@ -75,9 +76,9 @@ def drop_unnecesary_columns (pdb_df): drop_columns = ['record_name','blank_1','blank_2','blank_3','blank_4','alt_loc','insertion','segment_id','charge','line_idx','occupancy','b_factor'] pdb_df.drop(drop_columns,axis=1,inplace=True) - return pdb_df + return pdb_df -def change_aminoacids_name_to_one_letter_name (pdb_df): +def change_aminoacids_name_to_one_letter_name (pdb_df): """ Changes the three letter aminoacid name to the one letter name @@ -88,11 +89,11 @@ def change_aminoacids_name_to_one_letter_name (pdb_df): "ASH": "A", "ALA": "A", "CYX": "C", "CYS": "C", "ASP": "D", "GLU": "E", "PHE": "F", "GLY": "G", "HIS": "H", "HID": "H", "HIE": "H", "HIP": "H", "ILE": "I", "LYS": "K", "LEU": "L", "MET": "M", "MSE": "M", "ASN": "N", "PYL": "O", "HYP": "P", "PRO": "P", - "GLN": "Q", "ARG": "R", "SER": "S", "THR": "T", "SEL": "U", "VAL": "V", "TRP": "W", + "GLN": "Q", "ARG": "R", "SER": "S", "THR": "T", "SEL": "U", "VAL": "V", "TRP": "W", "TYR": "Y", } pdb_df ['resname_one_letter'] = pdb_df ['residue_name'] - + pdb_df = pdb_df.replace({'resname_one_letter': amino3to1},inplace=True) return pdb_df @@ -106,7 +107,7 @@ def create_coarse_grain_df (pdb_df): Return: coarse_grain_df (df): df with information of the coarse grain model. """ - coarse_grain_per_chain = {} + coarse_grain_per_chain = {} coarse_grain_df = pd.DataFrame() if args.chain_id is not None: @@ -115,16 +116,16 @@ def create_coarse_grain_df (pdb_df): alpha_carbon_and_terminus = create_alpha_carbons_and_terminus_beads (pdb_df) sidechain = create_sidechain_beads (pdb_df) coarse_grain_df= merge_alpha_carbon_and_sidechain (alpha_carbon_and_terminus,sidechain) - + else: - verbose_print (message=f'Coarse grain model for all the sub-units in the PDB', verbose=args.verbose) + verbose_print (message='Coarse grain model for all the sub-units in the PDB', verbose=args.verbose) for chain in pdb_df.groupby(['chain_id']).groups.keys(): pdb_df_per_chain = pdb_df.loc[pdb_df['chain_id']== chain] alpha_carbons_and_terminus = create_alpha_carbons_and_terminus_beads (pdb_df_per_chain) sidechain = create_sidechain_beads (pdb_df_per_chain) coarse_grain_per_chain [chain] = merge_alpha_carbon_and_sidechain (alpha_carbons_and_terminus,sidechain) coarse_grain_df = pd.concat ([coarse_grain_df,coarse_grain_per_chain[chain]],ignore_index=0,axis=0) - + coarse_grain_df.index = np.arange(1,len(coarse_grain_df)+1) return coarse_grain_df @@ -138,7 +139,7 @@ def create_alpha_carbons_and_terminus_beads (pdb_df): pdb_df (cls): pandas df with the protein information. Return: alpha_carbons_and_terminus (dict): dictionary that contains: atom_numbers, position, resname,resname_one_letter,radius,chain - + """ residue_number_list = pdb_df.residue_number.to_list() @@ -153,7 +154,7 @@ def create_alpha_carbons_and_terminus_beads (pdb_df): y_coord = pd.Series (pdb_df.y_coord[atom]) z_coord = pd.Series (pdb_df.z_coord[atom]) - resname = 'n' + resname = 'n' resname = pd.Series (resname) resname_one_letter = pd.Series (resname) @@ -164,13 +165,13 @@ def create_alpha_carbons_and_terminus_beads (pdb_df): chain = pd.Series(pdb_df.chain_id[atom]) elif pdb_df.atom_name[atom] == 'CA': - + atom_number_pdb_ca= pd.Series(pdb_df.residue_number[atom]) x_coord_ca = pd.Series(pdb_df.x_coord[atom]) y_coord_ca = pd.Series(pdb_df.y_coord[atom]) z_coord_ca = pd.Series(pdb_df.z_coord[atom]) resname_ca = pd.Series(pdb_df.residue_name[atom]) - + resname_one_letter_ca = pd.Series ('CA') resid_ca = pd.Series(pdb_df.residue_number[atom]) @@ -216,33 +217,33 @@ def create_alpha_carbons_and_terminus_beads (pdb_df): chain = pd.concat ([chain,chain_c],axis =0, ignore_index = 0) resid = pd.concat ([resid,resid_c],axis =0, ignore_index = 0) - elif pdb_df.residue_name[atom] == 'ca' : + elif pdb_df.residue_name[atom] == 'ca': + + atom_number_pdb_metal= pd.Series(pdb_df.residue_number[atom]) + x_coord_metal = pd.Series(pdb_df.x_coord[atom]) + y_coord_metal = pd.Series(pdb_df.y_coord[atom]) + z_coord_metal = pd.Series(pdb_df.z_coord[atom]) - atom_number_pdb_metal= pd.Series(pdb_df.residue_number[atom]) - x_coord_metal = pd.Series(pdb_df.x_coord[atom]) - y_coord_metal = pd.Series(pdb_df.y_coord[atom]) - z_coord_metal = pd.Series(pdb_df.z_coord[atom]) - - resname_metal = 'ca' + resname_metal = 'ca' - resname_metal = pd.Series(resname_metal) - resname_one_letter_metal = pd.Series (resname_metal) - resid_metal = pd.Series(pdb_df.residue_number[atom]) + resname_metal = pd.Series(resname_metal) + resname_one_letter_metal = pd.Series (resname_metal) + resid_metal = pd.Series(pdb_df.residue_number[atom]) - atom_radius_metal = 2.3 - radius_metal = pd.Series (atom_radius_metal) - chain_metal = pd.Series(pdb_df.chain_id[atom]) + atom_radius_metal = 2.3 + radius_metal = pd.Series (atom_radius_metal) + chain_metal = pd.Series(pdb_df.chain_id[atom]) - atom_numbers = pd.concat([atom_numbers,atom_number_pdb_metal], axis=0, ignore_index = True) - x_coord = pd.concat ([x_coord,x_coord_metal],axis=0, ignore_index = True) - y_coord = pd.concat ([y_coord,y_coord_metal],axis=0, ignore_index = True) - z_coord = pd.concat ([z_coord,z_coord_metal],axis=0, ignore_index = True) - resname = pd.concat ([resname,resname_metal], axis=0, ignore_index = True) - resname_one_letter = pd.concat ([resname_one_letter,resname_one_letter_metal], axis=0, ignore_index=True) + atom_numbers = pd.concat([atom_numbers,atom_number_pdb_metal], axis=0, ignore_index = True) + x_coord = pd.concat ([x_coord,x_coord_metal],axis=0, ignore_index = True) + y_coord = pd.concat ([y_coord,y_coord_metal],axis=0, ignore_index = True) + z_coord = pd.concat ([z_coord,z_coord_metal],axis=0, ignore_index = True) + resname = pd.concat ([resname,resname_metal], axis=0, ignore_index = True) + resname_one_letter = pd.concat ([resname_one_letter,resname_one_letter_metal], axis=0, ignore_index=True) - radius = pd.concat ([radius,radius_metal], axis = 0, ignore_index = True ) - chain = pd.concat ([chain,chain_metal],axis =0, ignore_index = 0) - resid = pd.concat ([resid,resid_metal],axis =0, ignore_index = 0) + radius = pd.concat ([radius,radius_metal], axis = 0, ignore_index = True ) + chain = pd.concat ([chain,chain_metal],axis =0, ignore_index = 0) + resid = pd.concat ([resid,resid_metal],axis =0, ignore_index = 0) alpha_carbons_and_terminus = {'atom_numbers': atom_numbers,'x_coord':x_coord,'y_coord':y_coord,'z_coord':z_coord,\ 'resname':resname,'resname_one_letter':resname_one_letter ,'radius': radius, 'chain': chain, 'resid': resid} @@ -250,80 +251,80 @@ def create_alpha_carbons_and_terminus_beads (pdb_df): return alpha_carbons_and_terminus def create_sidechain_beads (pdb_df) : - """ - Creates the sidechain beads position, which is created considering the original position of all the atoms in the sidechain - and then calculating the center of mass. - - Args: - pdb_df (cls): pandas df with the protein information. - Return: - residues_bead (dict): dictionary with the information of atom_numbers, coordinates , resname, chain and radius. - - """ - - pdb_df.loc[pdb_df['element_symbol'] == 'C', 'mass'] = 12.0 - pdb_df.loc[pdb_df['element_symbol'] == 'N', 'mass'] = 14.0 - pdb_df.loc[pdb_df['element_symbol'] == 'O', 'mass'] = 16.0 - pdb_df.loc[pdb_df['element_symbol'] == 'S', 'mass'] = 32.0 - pdb_df.loc[pdb_df['element_symbol'] == 'CA', 'mass'] = 40.0 - - x_cm = pdb_df.x_coord*pdb_df.mass - y_cm = pdb_df.y_coord*pdb_df.mass - z_cm = pdb_df.z_coord*pdb_df.mass - - pdb_df ['x_cm'] = x_cm - pdb_df ['y_cm'] = y_cm - pdb_df ['z_cm'] = z_cm - - pdb_df = pdb_df.loc[(pdb_df.atom_name != 'O') & (pdb_df.atom_name != 'C') & \ - (pdb_df.atom_name != 'N') & (pdb_df.atom_name != 'CA') & (pdb_df.residue_name != 'GLY') & (pdb_df.atom_name != 'ca') ] - - pdb_df['sum_x_cm'] = pdb_df.groupby(['residue_number'])['x_cm'].transform(sum) - pdb_df['sum_y_cm'] = pdb_df.groupby(['residue_number'])['y_cm'].transform(sum) - pdb_df['sum_z_cm'] = pdb_df.groupby(['residue_number'])['z_cm'].transform(sum) - pdb_df['sum_mass'] = pdb_df.groupby(['residue_number'])['mass'].transform(sum) - pdb_df['sum_res'] = pdb_df.groupby('residue_number')['residue_number'].transform('count') - - pdb_df['x_pos'] = pdb_df ['sum_x_cm']/ pdb_df['sum_mass'] - pdb_df['y_pos'] = pdb_df ['sum_y_cm']/ pdb_df['sum_mass'] - pdb_df['z_pos'] = pdb_df ['sum_z_cm']/ pdb_df['sum_mass'] - - ''' sidechain radius of gyration ''' - - pdb_df['delta_x'] = (pdb_df['x_coord'] - pdb_df['x_pos'])**2 - pdb_df['delta_y'] = (pdb_df['y_coord'] - pdb_df['y_pos'])**2 - pdb_df['delta_z'] = (pdb_df['z_coord'] - pdb_df['z_pos'])**2 - - cols_to_sum = ['delta_x','delta_y','delta_z'] - - pdb_df['sum_rg'] = pdb_df[cols_to_sum].sum(axis=1) - pdb_df['sum_rg'] = pdb_df.groupby(['residue_number'])['sum_rg'].transform(sum) - pdb_df['radius_r'] = np.sqrt(pdb_df['sum_rg']/pdb_df['sum_res'],dtype='float').round(4) - - pdb_df = pdb_df.drop_duplicates(subset='residue_number') - - check_sidechain_radius (pdb_df) - verbose_print (message='It will be placed a bead with an estimated radius', verbose=args.verbose) - - x_coord_r = pdb_df['x_pos'] - y_coord_r = pdb_df['y_pos'] - z_coord_r = pdb_df['z_pos'] - - pdb_df['radius_mean'] = pdb_df.groupby(['residue_name'])['radius_r'].transform (np.mean).round(4) - - atom_number_pdb_r = pd.Series(pdb_df.residue_number) - resname_r = pd.Series(pdb_df.residue_name) - resname_one_letter_r= pd.Series (pdb_df.resname_one_letter) - resid_r = pd.Series(pdb_df.residue_number) - - radius_r = pd.Series(pdb_df['radius_mean']) - chain_r = pd.Series(pdb_df.chain_id) - - residues_bead = {'atom_numbers_r': atom_number_pdb_r,\ - 'x_coord_r':x_coord_r,'y_coord_r':y_coord_r,'z_coord_r':z_coord_r, \ - 'resname_r':resname_r,'resname_one_letter_r':resname_one_letter_r , 'radius_r':radius_r, 'chain_r': chain_r, 'resid_r': resid_r} - - return residues_bead + """ + Creates the sidechain beads position, which is created considering the original position of all the atoms in the sidechain + and then calculating the center of mass. + + Args: + pdb_df (cls): pandas df with the protein information. + Return: + residues_bead (dict): dictionary with the information of atom_numbers, coordinates , resname, chain and radius. + + """ + + pdb_df.loc[pdb_df['element_symbol'] == 'C', 'mass'] = 12.0 + pdb_df.loc[pdb_df['element_symbol'] == 'N', 'mass'] = 14.0 + pdb_df.loc[pdb_df['element_symbol'] == 'O', 'mass'] = 16.0 + pdb_df.loc[pdb_df['element_symbol'] == 'S', 'mass'] = 32.0 + pdb_df.loc[pdb_df['element_symbol'] == 'CA', 'mass'] = 40.0 + + x_cm = pdb_df.x_coord*pdb_df.mass + y_cm = pdb_df.y_coord*pdb_df.mass + z_cm = pdb_df.z_coord*pdb_df.mass + + pdb_df ['x_cm'] = x_cm + pdb_df ['y_cm'] = y_cm + pdb_df ['z_cm'] = z_cm + + pdb_df = pdb_df.loc[(pdb_df.atom_name != 'O') & (pdb_df.atom_name != 'C') & \ + (pdb_df.atom_name != 'N') & (pdb_df.atom_name != 'CA') & (pdb_df.residue_name != 'GLY') & (pdb_df.atom_name != 'ca') ] + + pdb_df['sum_x_cm'] = pdb_df.groupby(['residue_number'])['x_cm'].transform(sum) + pdb_df['sum_y_cm'] = pdb_df.groupby(['residue_number'])['y_cm'].transform(sum) + pdb_df['sum_z_cm'] = pdb_df.groupby(['residue_number'])['z_cm'].transform(sum) + pdb_df['sum_mass'] = pdb_df.groupby(['residue_number'])['mass'].transform(sum) + pdb_df['sum_res'] = pdb_df.groupby('residue_number')['residue_number'].transform('count') + + pdb_df['x_pos'] = pdb_df ['sum_x_cm']/ pdb_df['sum_mass'] + pdb_df['y_pos'] = pdb_df ['sum_y_cm']/ pdb_df['sum_mass'] + pdb_df['z_pos'] = pdb_df ['sum_z_cm']/ pdb_df['sum_mass'] + + ''' sidechain radius of gyration ''' + + pdb_df['delta_x'] = (pdb_df['x_coord'] - pdb_df['x_pos'])**2 + pdb_df['delta_y'] = (pdb_df['y_coord'] - pdb_df['y_pos'])**2 + pdb_df['delta_z'] = (pdb_df['z_coord'] - pdb_df['z_pos'])**2 + + cols_to_sum = ['delta_x','delta_y','delta_z'] + + pdb_df['sum_rg'] = pdb_df[cols_to_sum].sum(axis=1) + pdb_df['sum_rg'] = pdb_df.groupby(['residue_number'])['sum_rg'].transform(sum) + pdb_df['radius_r'] = np.sqrt(pdb_df['sum_rg']/pdb_df['sum_res'],dtype='float').round(4) + + pdb_df = pdb_df.drop_duplicates(subset='residue_number') + + check_sidechain_radius (pdb_df) + verbose_print (message='It will be placed a bead with an estimated radius', verbose=args.verbose) + + x_coord_r = pdb_df['x_pos'] + y_coord_r = pdb_df['y_pos'] + z_coord_r = pdb_df['z_pos'] + + pdb_df['radius_mean'] = pdb_df.groupby(['residue_name'])['radius_r'].transform (np.mean).round(4) + + atom_number_pdb_r = pd.Series(pdb_df.residue_number) + resname_r = pd.Series(pdb_df.residue_name) + resname_one_letter_r= pd.Series (pdb_df.resname_one_letter) + resid_r = pd.Series(pdb_df.residue_number) + + radius_r = pd.Series(pdb_df['radius_mean']) + chain_r = pd.Series(pdb_df.chain_id) + + residues_bead = {'atom_numbers_r': atom_number_pdb_r,\ + 'x_coord_r':x_coord_r,'y_coord_r':y_coord_r,'z_coord_r':z_coord_r, \ + 'resname_r':resname_r,'resname_one_letter_r':resname_one_letter_r , 'radius_r':radius_r, 'chain_r': chain_r, 'resid_r': resid_r} + + return residues_bead def check_sidechain_radius (pdb_df): """ @@ -331,14 +332,14 @@ def check_sidechain_radius (pdb_df): Args: pdb_df (cls): pandas df with the protein information. """ - + for bead in pdb_df.residue_name.keys(): if pdb_df.sum_res[bead] == 1: if pdb_df.residue_name[bead] == 'ALA': - pdb_df.radius_r [bead] = 1.0 + pdb_df.radius_r [bead] = 1.0 else: - verbose_print (message=f'{pdb_df.residue_name[bead]} from chain {pdb_df.chain_id[bead]} has missing atoms.', verbose=args.verbose) - pdb_df.radius_r [bead] = 1.0 + verbose_print (message=f'{pdb_df.residue_name[bead]} from chain {pdb_df.chain_id[bead]} has missing atoms.', verbose=args.verbose) + pdb_df.radius_r [bead] = 1.0 return 0 def merge_alpha_carbon_and_sidechain (alpha_carbons_and_terminals,residues_bead): @@ -351,7 +352,7 @@ def merge_alpha_carbon_and_sidechain (alpha_carbons_and_terminals,residues_bead) residues_bead (dict): dictionary with the information of atom_numbers, coordinates , resname, chain and radius. Return: - coarse_grain_df (cls): pandas df with the information of the coarse grain beads. + coarse_grain_df (cls): pandas df with the information of the coarse grain beads. """ if args.model == "2beadAA": @@ -388,8 +389,8 @@ def merge_alpha_carbon_and_sidechain (alpha_carbons_and_terminals,residues_bead) coarse_grain_df = pd.DataFrame() coarse_grain_df = pd.concat ( [atom_numbers,resname,resname_one_letter, resid, x_coord,y_coord,z_coord,radius,chain], axis = 1, ignore_index = True ) - - coarse_grain_df.columns = ['atom_numbers','resname','resname_one_letter','resid','x_coord','y_coord','z_coord','radius','chain'] + + coarse_grain_df.columns = ['atom_numbers','resname','resname_one_letter','resid','x_coord','y_coord','z_coord','radius','chain'] coarse_grain_df.sort_values (by = 'atom_numbers', inplace = True ) coarse_grain_df.index = np.arange( 1 , len(coarse_grain_df) + 1) @@ -400,9 +401,9 @@ def create_list_of_particles_bond (coarse_grain_df): """ Creates a list of all the bonds in the coarse grain model of the protein Args: - coarse_grain_df (cls): pandas df with the information of the coarse grain beads. + coarse_grain_df (cls): pandas df with the information of the coarse grain beads. Return: - particles_bond_list (list): list with all the particles bond, with the format 'XX:YY' + particles_bond_list (list): list with all the particles bond, with the format 'XX:YY' """ atom_numbers_list = [] bond_dict = {} @@ -412,7 +413,7 @@ def create_list_of_particles_bond (coarse_grain_df): coarse_grain_chain = coarse_grain_df.loc[coarse_grain_df['chain']==key] particle_id = coarse_grain_chain.loc[(coarse_grain_chain.resname_one_letter == 'CA')] - particle_id ['indice'] = particle_id.index + particle_id ['indice'] = particle_id.index alpha_carbons_id_list = particle_id['indice'].to_list() ''' adds to a list all the alpha carbon bonds ''' @@ -426,14 +427,14 @@ def create_list_of_particles_bond (coarse_grain_df): minimun = min (coarse_grain_chain.atom_numbers.keys()) maximum = max (coarse_grain_chain.atom_numbers.keys()) - for i in range (minimun, maximum+1): - if coarse_grain_chain.atom_numbers[i] not in atom_numbers_list: + for i in range (minimun, maximum+1): + if coarse_grain_chain.atom_numbers[i] not in atom_numbers_list: atom_numbers_list.append (coarse_grain_chain.atom_numbers[i] ) - for residues in atom_numbers_list: + for residues in atom_numbers_list: bond_dict [residues] = list () - for i in range (minimun,maximum+1): + for i in range (minimun,maximum+1): if i < (maximum): if coarse_grain_chain.atom_numbers[i] == coarse_grain_chain.atom_numbers[i+1]: @@ -453,10 +454,10 @@ def create_output_coarse_grain_model_as_vtf_file (coarse_grain,beads_bond, iden """ Creates the output as a VTF file that contains the protein coarse grained model information and the bonds - in the coarse grain model + in the coarse grain model Args: coarse_grain (cls): pandas df with the coarse graine information of the protein - beads_bond (list): list with all the bond between particles + beads_bond (list): list with all the bond between particles identifier (str): protein identifier """ @@ -474,19 +475,19 @@ def create_output_coarse_grain_model_as_vtf_file (coarse_grain,beads_bond, iden for i in (beads_bond): output_vtf_file.write(f'bond {i}\n') - output_vtf_file.write (f'\ntimestep indexed \n') + output_vtf_file.write ('\ntimestep indexed \n') cols_to_drop = ['resname','resname_one_letter','radius','chain','resid'] coarse_grain_df = coarse_grain.drop (cols_to_drop, axis = 1 ) output_vtf_file.write(coarse_grain_df.to_string (header = False)) - + output_vtf_file.close() return 0 if __name__ == '__main__': - - """Creates a coarse-grained model from a pdb structure of a protein. - The input can either be a pdb file or a pdb code. + + """Creates a coarse-grained model from a pdb structure of a protein. + The input can either be a pdb file or a pdb code. If a pdb code is given, the structure of the corresponding protein is downloaded from RCSB. By default, it constructs a 2-bead model of the protein with a bead in the alpha carbon and another in the side chain of each aminoacid @@ -507,11 +508,11 @@ def create_output_coarse_grain_model_as_vtf_file (coarse_grain,beads_bond, iden """ # Define argparse arguments to the script and parse them - parser = argparse.ArgumentParser(description='Creates a coarse-grained model from a protein structure given in PDB format') + parser = argparse.ArgumentParser(description='Creates a coarse-grained model from a protein structure given in PDB format') parser.add_argument('--filename', dest='filename', help='\nPath to the PDB file\n') - parser.add_argument('--download_pdb', dest='pdb_code', help='Downloads the corresponding PDB from RCSB and coarse-grains it') + parser.add_argument('--download_pdb', dest='pdb_code', help='Downloads the corresponding PDB from RCSB and coarse-grains it') parser.add_argument('--model', dest='model', default='2beadAA', type=str , help='\nCoarse-grained model to be used\n') - parser.add_argument('--chain_id', type=str , help='\nSpecific chaid_id to coarse-grain\n') + parser.add_argument('--chain_id', type=str , help='\nSpecific chaid_id to coarse-grain\n') parser.add_argument('--verbose', dest='verbose', action='store_true') parser.add_argument('--no-verbose', dest='verbose', action='store_false') parser.set_defaults(verbose=True) @@ -523,14 +524,14 @@ def create_output_coarse_grain_model_as_vtf_file (coarse_grain,beads_bond, iden if args.filename is None and args.pdb_code is None: verbose_print(message="WARNING: no inputs were provided, nothing was done", verbose=args.verbose) - exit() + sys.exit(1) elif args.filename is not None and args.pdb_code is not None: verbose_print(message="ERROR: --filename and --download_pdb argparse modes cannot be active at the same time, please choose one functionality", verbose=args.verbose) - exit() + sys.exit(1) if args.model not in valid_keys_models: verbose_print(message="ERROR: --model only takes " + str(valid_keys_models), verbose=args.verbose) - exit() - + sys.exit(1) + biopandas_df = create_biopandas_df() pdb_df = create_pandas_df_of_the_protein_and_metal_ions(biopandas_df) drop_unnecesary_columns(pdb_df) @@ -542,4 +543,4 @@ def create_output_coarse_grain_model_as_vtf_file (coarse_grain,beads_bond, iden elif args.pdb_code: identifier=args.pdb_code create_output_coarse_grain_model_as_vtf_file(coarse_grain_df,beads_bond,identifier) - verbose_print(message=f'Finished coarse grain model', verbose=args.verbose) + verbose_print(message='Finished coarse grain model', verbose=args.verbose) diff --git a/lib/handy_functions.py b/lib/handy_functions.py index 7f5f178..a6c4e2e 100644 --- a/lib/handy_functions.py +++ b/lib/handy_functions.py @@ -154,7 +154,7 @@ def setup_langevin_dynamics(espresso_system, kT, SEED,time_step=1e-2, gamma=1, t def create_random_seed(): """ - Creates the seed for the random number generator from the system hour + Generates a seed for the random number generator using the system time in seconds. """ import time SEED=int(time.time()) diff --git a/maintainer/configure_venv.py b/maintainer/configure_venv.py index fbd3c74..5719621 100644 --- a/maintainer/configure_venv.py +++ b/maintainer/configure_venv.py @@ -2,7 +2,7 @@ import argparse import sysconfig try: - import espressomd + import espressomd # pylint: disable=unused-import espressomd_found = True except ModuleNotFoundError: espressomd_found = False diff --git a/maintainer/standarize_data.py b/maintainer/standarize_data.py index 4266fe0..425a779 100644 --- a/maintainer/standarize_data.py +++ b/maintainer/standarize_data.py @@ -46,7 +46,6 @@ Ref_torres = ["1f6s-10mM-torres.dat","1beb-10mM-torres.dat" ] Ref_landsgesell=["data_landsgesell.csv"] - if filename in Refs_lunkad: data=pd.read_csv(ref_path) Z_ref = 5*-1*data['aaa']+5*data['aab'] @@ -59,7 +58,6 @@ data=np.loadtxt(ref_path, delimiter=",") Z_ref=data[:,1] Z_ref_err=data[:,2] - pH_range = np.linspace(2, 12, num=21) elif filename in Ref_torres: @@ -81,21 +79,13 @@ else: raise RuntimeError() - -# Store the data -data=pd.DataFrame({"pH": pH_range, - "charge": Z_ref, - "charge_error": Z_ref_err}) - -if filename in Refs_lunkad+Ref_blanco: - pH_range = np.linspace(2, 12, num=21) - - # Store the data +if filename in Refs_lunkad+Ref_blanco+Ref_torres: + # Create the pandas DataFrame data=pd.DataFrame({"pH": pH_range, "charge": Z_ref, "charge_error": Z_ref_err}) - -data_path=pmb.get_resource(f"testsuite/data") +# Store the data +data_path=pmb.get_resource("testsuite/data") data.to_csv(f"{data_path}/{output_filenames[filename]}", index=False) diff --git a/pyMBE.py b/pyMBE.py index fa9c0fb..eb1e5ba 100644 --- a/pyMBE.py +++ b/pyMBE.py @@ -1,3 +1,13 @@ +import re +import sys +import ast +import json +import pint +import numpy as np +import pandas as pd +import scipy.optimize + + class pymbe_library(): """ @@ -11,12 +21,6 @@ class pymbe_library(): kT(`obj`): Thermal energy using the `pmb.units` UnitRegistry. It is used as the unit of reduced energy. Kw(`obj`): Ionic product of water using the `pmb.units` UnitRegistry. Used in the setup of the G-RxMC method. """ - import pint - import re - import numpy as np - import pandas as pd - import json - from scipy import optimize units = pint.UnitRegistry() N_A=6.02214076e23 / units.mol Kb=1.38064852e-23 * units.J / units.K @@ -27,6 +31,22 @@ class pymbe_library(): SEED=None rng=None + + class NumpyEncoder(json.JSONEncoder): + + """ + Custom JSON encoder that converts NumPy arrays to Python lists + and NumPy scalars to Python scalars. + """ + + def default(self, obj): + if isinstance(obj, np.ndarray): + return obj.tolist() + if isinstance(obj, np.generic): + return obj.item() + return super().default(obj) + + def __init__(self, SEED, temperature=None, unit_length=None, unit_charge=None, Kw=None): """ Initializes the pymbe_library by setting up the reduced unit system with `temperature` and `reduced_length` @@ -46,21 +66,9 @@ def __init__(self, SEED, temperature=None, unit_length=None, unit_charge=None, K """ # Seed and RNG self.SEED=SEED - self.rng = self.np.random.default_rng(SEED) - # Default definitions of reduced units - if temperature is None: - temperature= 298.15 * self.units.K - if unit_length is None: - unit_length= 0.355 * self.units.nm - if unit_charge is None: - unit_charge=self.units.e - if Kw is None: - Kw = 1e-14 - self.kT=temperature*self.Kb - self.Kw=Kw*self.units.mol**2 / (self.units.l**2) - self.units.define(f'reduced_energy = {self.kT}') - self.units.define(f'reduced_length = {unit_length}') - self.units.define(f'reduced_charge = 1*e') + self.rng = np.random.default_rng(SEED) + self.set_reduced_units(unit_length=unit_length, unit_charge=unit_charge, + temperature=temperature, Kw=Kw, verbose=False) self.setup_df() return @@ -108,7 +116,7 @@ def add_bond_in_df(self, particle_id1, particle_id2, use_default_bond=False): if not bond_key: return self.copy_df_entry(name=bond_key,column_name='particle_id2',number_of_copies=1) - indexs = self.np.where(self.df['name']==bond_key) + indexs = np.where(self.df['name']==bond_key) index_list = list (indexs[0]) used_bond_df = self.df.loc[self.df['particle_id2'].notnull()] #without this drop the program crashes when dropping duplicates because the 'bond' column is a dict @@ -141,7 +149,7 @@ def add_bonds_to_espresso(self, espresso_system) : return - def add_value_to_df(self,index,key,new_value, verbose=True): + def add_value_to_df(self,index,key,new_value, verbose=True, non_standard_value=False): """ Adds a value to a cell in the `pmb.df` DataFrame. @@ -149,20 +157,36 @@ def add_value_to_df(self,index,key,new_value, verbose=True): index(`int`): index of the row to add the value to. key(`str`): the column label to add the value to. verbose(`bool`, optional): Switch to activate/deactivate verbose. Defaults to True. + non_standard_value(`bool`, optional): Switch to enable insertion of non-standard values, such as `dict` objects. Defaults to False. """ + + token = "#protected:" + + def protect(obj): + if non_standard_value: + return token + json.dumps(obj, cls=self.NumpyEncoder) + return obj + + def deprotect(obj): + if non_standard_value and isinstance(obj, str) and obj.startswith(token): + return json.loads(obj.removeprefix(token)) + return obj + # Make sure index is a scalar integer value index = int (index) assert isinstance(index, int), '`index` should be a scalar integer value.' - idx = self.pd.IndexSlice + idx = pd.IndexSlice if self.check_if_df_cell_has_a_value(index=index,key=key): old_value= self.df.loc[index,idx[key]] if verbose: - if old_value != new_value: + if protect(old_value) != protect(new_value): name=self.df.loc[index,('name','')] pmb_type=self.df.loc[index,('pmb_type','')] print(f"WARNING: you are redefining the properties of {name} of pmb_type {pmb_type}") print(f'WARNING: overwritting the value of the entry `{key}`: old_value = {old_value} new_value = {new_value}') - self.df.loc[index,idx[key]] = new_value + self.df.loc[index,idx[key]] = protect(new_value) + if non_standard_value: + self.df[key] = self.df[key].apply(deprotect) return def assign_molecule_id(self, name, molecule_index, pmb_type, used_molecules_id): @@ -186,8 +210,8 @@ def assign_molecule_id(self, name, molecule_index, pmb_type, used_molecules_id): else: # check if a residue is part of another molecule check_residue_name = self.df[self.df['residue_list'].astype(str).str.contains(name)] - pmb_type = self.df.loc[self.df['name']==name].pmb_type.values[0] - if not check_residue_name.empty and pmb_type == pmb_type : + mol_pmb_type = self.df.loc[self.df['name']==name].pmb_type.values[0] + if not check_residue_name.empty and mol_pmb_type == pmb_type: for value in check_residue_name.index.to_list(): if value not in used_molecules_id: molecule_id = self.df.loc[value].molecule_id.values[0] @@ -214,7 +238,7 @@ def calculate_center_of_mass_of_molecule(self, molecule_id, espresso_system): center_of_mass(`lst`): Coordinates of the center of mass. """ total_beads = 0 - center_of_mass = self.np.zeros(3) + center_of_mass = np.zeros(3) axis_list = [0,1,2] particle_id_list = self.df.loc[self.df['molecule_id']==molecule_id].particle_id.dropna().to_list() for pid in particle_id_list: @@ -244,7 +268,7 @@ def calculate_HH(self, molecule_name, pH_list=None, pka_set=None): - This function should only be used for single-phase systems. For two-phase systems `pmb.calculate_HH_Donnan` should be used. """ if pH_list is None: - pH_list=self.np.linspace(2,12,50) + pH_list=np.linspace(2,12,50) if pka_set is None: pka_set=self.get_pka_set() self.check_pka_set(pka_set=pka_set) @@ -255,7 +279,7 @@ def calculate_HH(self, molecule_name, pH_list=None, pka_set=None): index = self.df.loc[self.df['name'] == molecule_name].index[0].item() residue_list = self.df.at [index,('residue_list','')] sequence = self.df.at [index,('sequence','')] - if self.np.any(self.pd.isnull(sequence)): + if np.any(pd.isnull(sequence)): # Molecule has no sequence for residue in residue_list: list_of_particles_in_residue = self.search_particles_in_residue(residue) @@ -311,7 +335,7 @@ def calculate_HH_Donnan(self, c_macro, c_salt, pH_list=None, pka_set=None): - If `c_macro` does not contain all charged molecules in the system, this function is likely to provide the wrong result. """ if pH_list is None: - pH_list=self.np.linspace(2,12,50) + pH_list=np.linspace(2,12,50) if pka_set is None: pka_set=self.get_pka_set() self.check_pka_set(pka_set=pka_set) @@ -350,7 +374,7 @@ def calc_partition_coefficient(charge, c_macro): charge_density = 0.0 for key in charge: charge_density += charge[key] * c_macro[key] - return (-charge_density / (2 * ionic_strength_res) + self.np.sqrt((charge_density / (2 * ionic_strength_res))**2 + 1)).magnitude + return (-charge_density / (2 * ionic_strength_res) + np.sqrt((charge_density / (2 * ionic_strength_res))**2 + 1)).magnitude for pH_value in pH_list: # calculate the ionic strength of the reservoir @@ -362,15 +386,15 @@ def calc_partition_coefficient(charge, c_macro): #Determine the partition coefficient of positive ions by solving the system of nonlinear, coupled equations #consisting of the partition coefficient given by the ideal Donnan theory and the Henderson-Hasselbalch equation. #The nonlinear equation is formulated for log(xi) since log-operations are not supported for RootResult objects. - equation = lambda logxi: logxi - self.np.log10(calc_partition_coefficient(calc_charges(c_macro, pH_value - logxi), c_macro)) - logxi = self.optimize.root_scalar(equation, bracket=[-1e2, 1e2], method="brentq") + equation = lambda logxi: logxi - np.log10(calc_partition_coefficient(calc_charges(c_macro, pH_value - logxi), c_macro)) + logxi = scipy.optimize.root_scalar(equation, bracket=[-1e2, 1e2], method="brentq") partition_coefficient = 10**logxi.root - charges_temp = calc_charges(c_macro, pH_value-self.np.log10(partition_coefficient)) + charges_temp = calc_charges(c_macro, pH_value-np.log10(partition_coefficient)) for key in c_macro: Z_HH_Donnan[key].append(charges_temp[key]) - pH_system_list.append(pH_value - self.np.log10(partition_coefficient)) + pH_system_list.append(pH_value - np.log10(partition_coefficient)) partition_coefficients_list.append(partition_coefficient) return {"charges_dict": Z_HH_Donnan, "pH_system_list": pH_system_list, "partition_coefficients": partition_coefficients_list} @@ -398,14 +422,14 @@ def truncated_lj_potential(x, epsilon, sigma, cutoff): cutoff_red=cutoff.to('reduced_length').magnitude if bond_type == "harmonic": - r_0 = bond_object.params.get('r_0') - k = bond_object.params.get('k') - l0 = self.optimize.minimize(lambda x: 0.5*k*(x-r_0)**2 + truncated_lj_potential(x, epsilon_red, sigma_red, cutoff_red), x0=r_0).x + r_0 = bond_object.params.get('r_0') + k = bond_object.params.get('k') + l0 = scipy.optimize.minimize(lambda x: 0.5*k*(x-r_0)**2 + truncated_lj_potential(x, epsilon_red, sigma_red, cutoff_red), x0=r_0).x elif bond_type == "FENE": - r_0 = bond_object.params.get('r_0') - k = bond_object.params.get('k') - d_r_max = bond_object.params.get('d_r_max') - l0 = self.optimize.minimize(lambda x: -0.5*k*(d_r_max**2)*self.np.log(1-((x-r_0)/d_r_max)**2) + truncated_lj_potential(x, epsilon_red, sigma_red, cutoff_red), x0=1.0).x + r_0 = bond_object.params.get('r_0') + k = bond_object.params.get('k') + d_r_max = bond_object.params.get('d_r_max') + l0 = scipy.optimize.minimize(lambda x: -0.5*k*(d_r_max**2)*np.log(1-((x-r_0)/d_r_max)**2) + truncated_lj_potential(x, epsilon_red, sigma_red, cutoff_red), x0=1.0).x return l0 def calculate_net_charge (self, espresso_system, molecule_name): @@ -444,7 +468,7 @@ def create_charge_map(espresso_system,id_map,label): net_charge_residues=create_charge_map(label="residue_map", espresso_system=espresso_system, id_map=id_map) - mean_charge=self.np.mean(self.np.array(list(net_charge_molecules.values()))) + mean_charge=np.mean(np.array(list(net_charge_molecules.values()))) return {"mean": mean_charge, "molecules": net_charge_molecules, "residues": net_charge_residues} def center_molecule_in_simulation_box(self, molecule_id, espresso_system): @@ -498,11 +522,11 @@ def check_if_df_cell_has_a_value(self, index,key): Returns: `bool`: `True` if the cell has a value, `False` otherwise. """ - idx = self.pd.IndexSlice + idx = pd.IndexSlice import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") - return not self.pd.isna(self.df.loc[index, idx[key]]) + return not pd.isna(self.df.loc[index, idx[key]]) def check_if_name_is_defined_in_df(self, name, pmb_type_to_be_defined): """ @@ -518,7 +542,7 @@ def check_if_name_is_defined_in_df(self, name, pmb_type_to_be_defined): if name in self.df['name'].unique(): current_object_type = self.df[self.df['name']==name].pmb_type.values[0] if current_object_type != pmb_type_to_be_defined: - raise ValueError ((f"The name {name} is already defined in the df with a pmb_type = {current_object_type}, pymMBE does not support objects with the same name but different pmb_types")) + raise ValueError (f"The name {name} is already defined in the df with a pmb_type = {current_object_type}, pymMBE does not support objects with the same name but different pmb_types") return True else: return False @@ -535,10 +559,10 @@ def check_pka_set(self, pka_set): for required_key in required_keys: for pka_entry in pka_set.values(): if required_key not in pka_entry.keys(): - raise ValueError('missing a requiered key ', required_keys, 'in the following entry of pka_set', pka_entry) + raise ValueError(f'missing a required key "{required_key}" in the following entry of pka_set "{pka_entry}"') return - def clean_df_row(self, index, columns_keys_to_clean=["particle_id", "particle_id2", "residue_id", "molecule_id"]): + def clean_df_row(self, index, columns_keys_to_clean=("particle_id", "particle_id2", "residue_id", "molecule_id")): """ Cleans the columns of `pmb.df` in `columns_keys_to_clean` of the row with index `index` by assigning them a np.nan value. @@ -547,7 +571,7 @@ def clean_df_row(self, index, columns_keys_to_clean=["particle_id", "particle_id columns_keys_to_clean(`list` of `str`, optional): List with the column keys to be cleaned. Defaults to [`particle_id`, `particle_id2`, `residue_id`, `molecule_id`]. """ for column_key in columns_keys_to_clean: - self.add_value_to_df(key=(column_key,''),index=index,new_value=self.np.nan, verbose=False) + self.add_value_to_df(key=(column_key,''),index=index,new_value=np.nan, verbose=False) return def convert_columns_to_original_format (self, df): @@ -559,8 +583,6 @@ def convert_columns_to_original_format (self, df): """ - from ast import literal_eval - columns_dtype_int = ['particle_id','particle_id2', 'residue_id','molecule_id', 'model',('state_one','es_type'),('state_two','es_type'),('state_one','charge'),('state_two','charge') ] columns_with_units = ['sigma', 'epsilon', 'cutoff', 'offset'] @@ -574,12 +596,12 @@ def convert_columns_to_original_format (self, df): if df[column_name].isnull().all(): df[column_name] = df[column_name].astype(object) else: - df[column_name] = df[column_name].apply(lambda x: literal_eval(str(x)) if self.pd.notnull(x) else x) + df[column_name] = df[column_name].apply(lambda x: ast.literal_eval(str(x)) if pd.notnull(x) else x) for column_name in columns_with_units: - df[column_name] = df[column_name].apply(lambda x: self.create_variable_with_units(x) if self.pd.notnull(x) else x) + df[column_name] = df[column_name].apply(lambda x: self.create_variable_with_units(x) if pd.notnull(x) else x) - df['bond_object'] = df['bond_object'].apply(lambda x: (self.convert_str_to_bond_object(x)) if self.pd.notnull(x) else x) + df['bond_object'] = df['bond_object'].apply(lambda x: self.convert_str_to_bond_object(x) if pd.notnull(x) else x) return df @@ -595,26 +617,18 @@ def convert_str_to_bond_object (self, bond_str): bond_object(`obj`): EsPRESSo bond object """ - from ast import literal_eval from espressomd.interactions import HarmonicBond from espressomd.interactions import FeneBond supported_bonds = ['HarmonicBond', 'FeneBond'] for bond in supported_bonds: - - variable = self.re.subn(f'{bond}', '', bond_str) - + variable = re.subn(f'{bond}', '', bond_str) if variable[1] == 1: - - params = literal_eval(variable[0]) - + params = ast.literal_eval(variable[0]) if bond == 'HarmonicBond': - bond_object = HarmonicBond(r_cut =params['r_cut'], k = params['k'], r_0=params['r_0']) - elif bond == 'FeneBond': - bond_object = FeneBond(k = params['k'], d_r_max =params['d_r_max'], r_0=params['r_0']) return bond_object @@ -638,19 +652,19 @@ def copy_df_entry(self, name, column_name, number_of_copies): df_by_name = self.df.loc[self.df.name == name] if number_of_copies != 1: if df_by_name[column_name].isnull().values.any(): - df_by_name_repeated = self.pd.concat ([df_by_name]*(number_of_copies-1), ignore_index=True) + df_by_name_repeated = pd.concat ([df_by_name]*(number_of_copies-1), ignore_index=True) else: df_by_name = df_by_name[df_by_name.index == df_by_name.index.min()] - df_by_name_repeated = self.pd.concat ([df_by_name]*(number_of_copies), ignore_index=True) - df_by_name_repeated[column_name] =self.np.NaN + df_by_name_repeated = pd.concat ([df_by_name]*(number_of_copies), ignore_index=True) + df_by_name_repeated[column_name] =np.NaN # Concatenate the new particle rows to `df` - self.df = self.pd.concat ([self.df,df_by_name_repeated], ignore_index=True) + self.df = pd.concat ([self.df,df_by_name_repeated], ignore_index=True) else: if not df_by_name[column_name].isnull().values.any(): df_by_name = df_by_name[df_by_name.index == df_by_name.index.min()] - df_by_name_repeated = self.pd.concat ([df_by_name]*(number_of_copies), ignore_index=True) - df_by_name_repeated[column_name] =self.np.NaN - self.df = self.pd.concat ([self.df,df_by_name_repeated], ignore_index=True) + df_by_name_repeated = pd.concat ([df_by_name]*(number_of_copies), ignore_index=True) + df_by_name_repeated[column_name] =np.NaN + self.df = pd.concat ([self.df,df_by_name_repeated], ignore_index=True) return def create_added_salt (self, espresso_system, cation_name, anion_name, c_salt, verbose=True): @@ -775,14 +789,14 @@ def create_counterions(self, object_name, cation_name, anion_name, espresso_syst object_charge[name]=0 for id in object_ids: if espresso_system.part.by_id(id).q > 0: - object_charge['positive']+=1*(self.np.abs(espresso_system.part.by_id(id).q )) + object_charge['positive']+=1*(np.abs(espresso_system.part.by_id(id).q )) elif espresso_system.part.by_id(id).q < 0: - object_charge['negative']+=1*(self.np.abs(espresso_system.part.by_id(id).q )) - if (object_charge['positive'] % abs(anion_charge) == 0): + object_charge['negative']+=1*(np.abs(espresso_system.part.by_id(id).q )) + if object_charge['positive'] % abs(anion_charge) == 0: counterion_number[anion_name]=int(object_charge['positive']/abs(anion_charge)) else: raise ValueError('The number of positive charges in the pmb_object must be divisible by the charge of the anion') - if (object_charge['negative'] % abs(cation_charge) == 0): + if object_charge['negative'] % abs(cation_charge) == 0: counterion_number[cation_name]=int(object_charge['negative']/cation_charge) else: raise ValueError('The number of negative charges in the pmb_object must be divisible by the charge of the cation') @@ -814,15 +828,15 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi molecules_info (`dict`): {molecule_id: {residue_id:{"central_bead_id":central_bead_id, "side_chain_ids": [particle_id1, ...]}}} """ - if list_of_first_residue_positions != None: + if list_of_first_residue_positions is not None: for item in list_of_first_residue_positions: - if isinstance(item, list) == False: - raise ValueError(f"The provided input position is not a nested list. Should be a nested list with elements of 3D lists, corresponding to xyz coord.") + if not isinstance(item, list): + raise ValueError("The provided input position is not a nested list. Should be a nested list with elements of 3D lists, corresponding to xyz coord.") elif len(item) != 3: - raise ValueError(f"The provided input position is formatted wrong. The elements in the provided list does not have 3 coordinates, corresponding to xyz coord.") + raise ValueError("The provided input position is formatted wrong. The elements in the provided list does not have 3 coordinates, corresponding to xyz coord.") if len(list_of_first_residue_positions) != number_of_molecules: - raise ValueError(f"Number of positions provided in {list_of_first_residue_positions} does not match number of molecules desired, {number_of_molecules}") + raise ValueError(f"Number of positions provided in {list_of_first_residue_positions} does not match number of molecules desired, {number_of_molecules}") if number_of_molecules <= 0: return if not self.check_if_name_is_defined_in_df(name=name, @@ -836,7 +850,7 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi column_name='molecule_id', number_of_copies=number_of_molecules) - molecules_index = self.np.where(self.df['name']==name) + molecules_index = np.where(self.df['name']==name) molecule_index_list =list(molecules_index[0])[-number_of_molecules:] used_molecules_id = self.df.molecule_id.dropna().drop_duplicates().tolist() pos_index = 0 @@ -845,11 +859,11 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi molecules_info[molecule_id] = {} for residue in residue_list: if first_residue: - if list_of_first_residue_positions == None: + if list_of_first_residue_positions is None: residue_position = None else: for item in list_of_first_residue_positions: - residue_position = [self.np.array(list_of_first_residue_positions[pos_index])] + residue_position = [np.array(list_of_first_residue_positions[pos_index])] # Generate an arbitrary random unit vector backbone_vector = self.generate_random_points_in_a_sphere(center=[0,0,0], radius=1, @@ -933,7 +947,7 @@ def create_particle(self, name, espresso_system, number_of_particles, position=N q = self.df.loc[self.df['name']==name].state_one.charge.values[0] es_type = self.df.loc[self.df['name']==name].state_one.es_type.values[0] # Get a list of the index in `df` corresponding to the new particles to be created - index = self.np.where(self.df['name']==name) + index = np.where(self.df['name']==name) index_list =list(index[0])[-number_of_particles:] # Create the new particles into `espresso_system` created_pid_list=[] @@ -941,7 +955,7 @@ def create_particle(self, name, espresso_system, number_of_particles, position=N df_index=int (index_list[index]) self.clean_df_row(index=df_index) if position is None: - particle_position = self.rng.random((1, 3))[0] *self.np.copy(espresso_system.box_l) + particle_position = self.rng.random((1, 3))[0] *np.copy(espresso_system.box_l) else: particle_position = position[index] if len(espresso_system.part.all()) == 0: @@ -1002,7 +1016,7 @@ def create_protein(self, name, number_of_proteins, espresso_system, topology_dic self.copy_df_entry(name=name,column_name='molecule_id',number_of_copies=number_of_proteins) - protein_index = self.np.where(self.df['name']==name) + protein_index = np.where(self.df['name']==name) protein_index_list =list(protein_index[0])[-number_of_proteins:] used_molecules_id = self.df.molecule_id.dropna().drop_duplicates().tolist() @@ -1019,8 +1033,8 @@ def create_protein(self, name, number_of_proteins, espresso_system, topology_dic for residue in topology_dict.keys(): - residue_name = self.re.split(r'\d+', residue)[0] - residue_number = self.re.split(r'(\d+)', residue)[1] + residue_name = re.split(r'\d+', residue)[0] + residue_number = re.split(r'(\d+)', residue)[1] residue_position = topology_dict[residue]['initial_pos'] position = residue_position + protein_center @@ -1066,7 +1080,7 @@ def create_residue(self, name, espresso_system, number_of_residues, central_bead self.copy_df_entry(name=name, column_name='residue_id', number_of_copies=number_of_residues) - residues_index = self.np.where(self.df['name']==name) + residues_index = np.where(self.df['name']==name) residue_index_list =list(residues_index[0])[-number_of_residues:] # Internal bookkepping of the residue info (important for side-chain residues) # Dict structure {residue_id:{"central_bead_id":central_bead_id, "side_chain_ids":[particle_id1, ...]}} @@ -1080,7 +1094,7 @@ def create_residue(self, name, espresso_system, number_of_residues, central_bead residue_id = self.df['residue_id'].max() + 1 self.add_value_to_df(key=('residue_id',''),index=int (residue_index),new_value=residue_id, verbose=False) # create the principal bead - if self.df.loc[self.df['name']==name].central_bead.values[0] is self.np.NaN: + if self.df.loc[self.df['name']==name].central_bead.values[0] is np.NaN: raise ValueError("central_bead must contain a particle name") central_bead_name = self.df.loc[self.df['name']==name].central_bead.values[0] central_bead_id = self.create_particle(name=central_bead_name, @@ -1099,7 +1113,7 @@ def create_residue(self, name, espresso_system, number_of_residues, central_bead side_chain_beads_ids = [] for side_chain_element in side_chain_list: if side_chain_element not in self.df.values: - raise ValueError (side_chain_element +'is not defined') + raise ValueError (side_chain_element +'is not defined') pmb_type = self.df[self.df['name']==side_chain_element].pmb_type.values[0] if pmb_type == 'particle': bond = self.search_bond(particle_name1=central_bead_name, @@ -1190,15 +1204,15 @@ def create_variable_with_units(self, variable): variable_with_units(`obj`): variable with units using the pyMBE UnitRegistry. """ - if type(variable) is dict: + if isinstance(variable, dict): value=variable.pop('value') units=variable.pop('units') - elif type(variable) is str: + elif isinstance(variable, str): - value = float(self.re.split('\s+', variable)[0]) - units = self.re.split('\s+', variable)[1] + value = float(re.split(r'\s+', variable)[0]) + units = re.split(r'\s+', variable)[1] variable_with_units=value*self.units(units) @@ -1309,9 +1323,9 @@ def define_bond(self, bond_type, bond_parameters, particle_pairs): new_value='bond') self.add_value_to_df(index=index, key=('parameters_of_the_potential',''), - new_value=self.json.dumps(bond_object.get_params())) + new_value=bond_object.get_params(), + non_standard_value=True) - self.df['parameters_of_the_potential'] = self.df['parameters_of_the_potential'].apply (lambda x: eval(str(x)) if self.pd.notnull(x) else x) return @@ -1359,8 +1373,8 @@ def define_default_bond(self, bond_type, bond_parameters, epsilon=None, sigma=No new_value = 'bond') self.add_value_to_df(index = index, key = ('parameters_of_the_potential',''), - new_value = self.json.dumps(bond_object.get_params())) - self.df['parameters_of_the_potential'] = self.df['parameters_of_the_potential'].apply (lambda x: eval(str(x)) if self.pd.notnull(x) else x) + new_value = bond_object.get_params(), + non_standard_value=True) return def define_epsilon_value_of_particles(self, eps_dict): @@ -1518,11 +1532,11 @@ def define_protein(self, name,model, topology_dict): sigma_dict = {} for residue in topology_dict.keys(): - residue_name = self.re.split(r'\d+', residue)[0] + residue_name = re.split(r'\d+', residue)[0] if residue_name not in sigma_dict.keys(): sigma_dict [residue_name] = topology_dict[residue]['sigma'] - if residue_name != 'CA' and residue_name != 'Ca': + if residue_name not in ('CA', 'Ca'): protein_seq_list.append(residue_name) protein_sequence = ''.join(protein_seq_list) @@ -1631,7 +1645,7 @@ def determine_reservoir_concentrations_selfconsistently(cH_res, c_salt_res): cOH_res = self.Kw / cH_res cNa_res = None cCl_res = None - if (cOH_res>=cH_res): + if cOH_res>=cH_res: #adjust the concentration of sodium if there is excess OH- in the reservoir: cNa_res = c_salt_res + (cOH_res-cH_res) cCl_res = c_salt_res @@ -1642,11 +1656,11 @@ def determine_reservoir_concentrations_selfconsistently(cH_res, c_salt_res): def calculate_concentrations_self_consistently(cH_res, cOH_res, cNa_res, cCl_res): nonlocal max_number_sc_runs, self_consistent_run - if(self_consistent_run=cH_res): + if cOH_res>=cH_res: #adjust the concentration of sodium if there is excess OH- in the reservoir: cNa_res = c_salt_res + (cOH_res-cH_res) cCl_res = c_salt_res @@ -1661,16 +1675,16 @@ def calculate_concentrations_self_consistently(cH_res, cOH_res, cNa_res, cCl_res cH_res, cOH_res, cNa_res, cCl_res = determine_reservoir_concentrations_selfconsistently(cH_res, c_salt_res) ionic_strength_res = 0.5*(cNa_res+cCl_res+cOH_res+cH_res) - determined_pH = -self.np.log10(cH_res.to('mol/L').magnitude * self.np.sqrt(activity_coefficient_monovalent_pair(ionic_strength_res))) + determined_pH = -np.log10(cH_res.to('mol/L').magnitude * np.sqrt(activity_coefficient_monovalent_pair(ionic_strength_res))) while abs(determined_pH-pH_res)>1e-6: - if (determined_pH>pH_res): + if determined_pH > pH_res: cH_res *= 1.005 else: cH_res /= 1.003 cH_res, cOH_res, cNa_res, cCl_res = determine_reservoir_concentrations_selfconsistently(cH_res, c_salt_res) ionic_strength_res = 0.5*(cNa_res+cCl_res+cOH_res+cH_res) - determined_pH = -self.np.log10(cH_res.to('mol/L').magnitude * self.np.sqrt(activity_coefficient_monovalent_pair(ionic_strength_res))) + determined_pH = -np.log10(cH_res.to('mol/L').magnitude * np.sqrt(activity_coefficient_monovalent_pair(ionic_strength_res))) self_consistent_run=0 return cH_res, cOH_res, cNa_res, cCl_res @@ -1729,9 +1743,9 @@ def find_value_from_es_type(self, es_type, column_name): Returns: Value in `pymbe.df` matching `column_name` and `es_type` """ - idx = self.pd.IndexSlice + idx = pd.IndexSlice for state in ['state_one', 'state_two']: - index = self.np.where(self.df[(state, 'es_type')] == es_type)[0] + index = np.where(self.df[(state, 'es_type')] == es_type)[0] if len(index) > 0: if column_name == 'label': label = self.df.loc[idx[index[0]], idx[(state,column_name)]] @@ -1764,7 +1778,7 @@ def generate_coordinates_outside_sphere(self, center, radius, max_dist, n_sample coord = self.generate_random_points_in_a_sphere(center=center, radius=max_dist, n_samples=1)[0] - if self.np.linalg.norm(coord-self.np.asarray(center))>=radius: + if np.linalg.norm(coord-np.asarray(center))>=radius: coord_list.append (coord) counter += 1 return coord_list @@ -1787,17 +1801,17 @@ def generate_random_points_in_a_sphere(self, center, radius, n_samples, on_surfa - Algorithm from: https://baezortega.github.io/2018/10/14/hypersphere-sampling/ """ # initial values - center=self.np.array(center) + center=np.array(center) d = center.shape[0] # sample n_samples points in d dimensions from a standard normal distribution samples = self.rng.normal(size=(n_samples, d)) # make the samples lie on the surface of the unit hypersphere - normalize_radii = self.np.linalg.norm(samples, axis=1)[:, self.np.newaxis] + normalize_radii = np.linalg.norm(samples, axis=1)[:, np.newaxis] samples /= normalize_radii if not on_surface: # make the samples lie inside the hypersphere with the correct density - uniform_points = self.rng.uniform(size=n_samples)[:, self.np.newaxis] - new_radii = self.np.power(uniform_points, 1/d) + uniform_points = self.rng.uniform(size=n_samples)[:, np.newaxis] + new_radii = np.power(uniform_points, 1/d) samples *= new_radii # scale the points to have the correct radius and center samples = samples * radius + center @@ -1814,9 +1828,9 @@ def generate_trial_perpendicular_vector(self,vector,magnitude): Returns: (`lst`): Orthogonal vector with the same magnitude as the input vector. """ - np_vec = self.np.array(vector) - np_vec /= self.np.linalg.norm(np_vec) - if self.np.all(np_vec == 0): + np_vec = np.array(vector) + np_vec /= np.linalg.norm(np_vec) + if np.all(np_vec == 0): raise ValueError('Zero vector') # Generate a random vector random_vector = self.generate_random_points_in_a_sphere(radius=1, @@ -1824,10 +1838,10 @@ def generate_trial_perpendicular_vector(self,vector,magnitude): n_samples=1, on_surface=True)[0] # Project the random vector onto the input vector and subtract the projection - projection = self.np.dot(random_vector, np_vec) * np_vec + projection = np.dot(random_vector, np_vec) * np_vec perpendicular_vector = random_vector - projection # Normalize the perpendicular vector to have the same magnitude as the input vector - perpendicular_vector /= self.np.linalg.norm(perpendicular_vector) + perpendicular_vector /= np.linalg.norm(perpendicular_vector) return perpendicular_vector*magnitude def get_bond_length(self, particle_name1, particle_name2, hard_check=False, use_default_bond=False) : @@ -1857,7 +1871,7 @@ def get_bond_length(self, particle_name1, particle_name2, hard_check=False, use_ else: print("Bond not defined between particles ", particle_name1, " and ", particle_name2) if hard_check: - raise KeyboardInterrupt + sys.exit(1) else: return @@ -1877,9 +1891,9 @@ def get_charge_map(self): df_state_two = self.df.state_two.dropna() else: df_state_two = self.df.state_two - state_one = self.pd.Series (df_state_one.charge.values,index=df_state_one.es_type.values) - state_two = self.pd.Series (df_state_two.charge.values,index=df_state_two.es_type.values) - charge_map = self.pd.concat([state_one,state_two],axis=0).to_dict() + state_one = pd.Series (df_state_one.charge.values,index=df_state_one.es_type.values) + state_two = pd.Series (df_state_two.charge.values,index=df_state_two.es_type.values) + charge_map = pd.concat([state_one,state_two],axis=0).to_dict() return charge_map def get_lj_parameters(self, particle_name1, particle_name2, combining_rule='Lorentz-Berthelot'): @@ -1908,7 +1922,7 @@ def get_lj_parameters(self, particle_name1, particle_name2, combining_rule='Lore eps2 = self.df.loc[self.df["name"]==particle_name2].epsilon.iloc[0] sigma2 = self.df.loc[self.df["name"]==particle_name2].sigma.iloc[0] - return {"epsilon": self.np.sqrt(eps1*eps2), "sigma": (sigma1+sigma2)/2.0} + return {"epsilon": np.sqrt(eps1*eps2), "sigma": (sigma1+sigma2)/2.0} def get_particle_id_map(self, object_name): ''' @@ -1977,9 +1991,9 @@ def get_radius_map(self): df_state_one = self.df[[('sigma',''),('state_one','es_type')]].dropna().drop_duplicates() df_state_two = self.df[[('sigma',''),('state_two','es_type')]].dropna().drop_duplicates() - state_one = self.pd.Series(df_state_one.sigma.values/2.0,index=df_state_one.state_one.es_type.values) - state_two = self.pd.Series(df_state_two.sigma.values/2.0,index=df_state_two.state_two.es_type.values) - radius_map = self.pd.concat([state_one,state_two],axis=0).to_dict() + state_one = pd.Series(df_state_one.sigma.values/2.0,index=df_state_one.state_one.es_type.values) + state_two = pd.Series(df_state_two.sigma.values/2.0,index=df_state_two.state_two.es_type.values) + radius_map = pd.concat([state_one,state_two],axis=0).to_dict() return radius_map @@ -2013,9 +2027,9 @@ def get_type_map(self): df_state_two = self.df.state_two.dropna(how='all') else: df_state_two = self.df.state_two - state_one = self.pd.Series (df_state_one.es_type.values,index=df_state_one.label) - state_two = self.pd.Series (df_state_two.es_type.values,index=df_state_two.label) - type_map = self.pd.concat([state_one,state_two],axis=0).to_dict() + state_one = pd.Series (df_state_one.es_type.values,index=df_state_one.label) + state_two = pd.Series (df_state_two.es_type.values,index=df_state_two.label) + type_map = pd.concat([state_one,state_two],axis=0).to_dict() return type_map def load_interaction_parameters(self, filename, verbose=True): @@ -2026,12 +2040,11 @@ def load_interaction_parameters(self, filename, verbose=True): filename(`str`): name of the file to be read verbose (`bool`, optional): Switch to activate/deactivate verbose. Defaults to True. """ - from espressomd import interactions without_units = ['q','es_type','acidity'] with_units = ['sigma','epsilon'] with open(filename, 'r') as f: - interaction_data = self.json.load(f) + interaction_data = json.load(f) interaction_parameter_set = interaction_data["data"] for key in interaction_parameter_set: @@ -2106,7 +2119,7 @@ def load_pka_set(self, filename, verbose=True): - If `name` is already defined in the `pymbe.df`, it prints a warning. """ with open(filename, 'r') as f: - pka_data = self.json.load(f) + pka_data = json.load(f) pka_set = pka_data["data"] self.check_pka_set(pka_set=pka_set) @@ -2209,7 +2222,7 @@ def protein_sequence_parser(self, sequence): "COOH": "c"} clean_sequence=[] if isinstance(sequence, str): - if (sequence.find("-") != -1): + if sequence.find("-") != -1: splited_sequence=sequence.split("-") for residue in splited_sequence: if len(residue) == 1: @@ -2225,7 +2238,7 @@ def protein_sequence_parser(self, sequence): if residue in keys.keys(): clean_sequence.append(keys[residue]) else: - if (residue.upper() in keys.keys()): + if residue.upper() in keys.keys(): clean_sequence.append(keys[residue.upper()]) else: raise ValueError("Unknown code for a residue: ", residue, " please review the input sequence") @@ -2241,16 +2254,16 @@ def protein_sequence_parser(self, sequence): clean_sequence.append(residue_ok) if isinstance(sequence, list): for residue in sequence: - if residue in keys.values(): - residue_ok=residue + if residue in keys.values(): + residue_ok=residue + else: + if residue.upper() in keys.values(): + residue_ok=residue.upper() + elif (residue.upper() in keys.keys()): + clean_sequence.append(keys[residue.upper()]) else: - if residue.upper() in keys.values(): - residue_ok=residue.upper() - elif (residue.upper() in keys.keys()): - clean_sequence.append(keys[residue.upper()]) - else: - raise ValueError("Unknown code for a residue: ", residue, " please review the input sequence") - clean_sequence.append(residue_ok) + raise ValueError("Unknown code for a residue: ", residue, " please review the input sequence") + clean_sequence.append(residue_ok) return clean_sequence def read_pmb_df (self,filename): @@ -2264,12 +2277,12 @@ def read_pmb_df (self,filename): This function only accepts files with CSV format. """ - if filename[-3:] != "csv": + if filename.rsplit(".", 1)[1] != "csv": raise ValueError("Only files with CSV format are supported") - df = self.pd.read_csv (filename,header=[0, 1], index_col=0) + df = pd.read_csv (filename,header=[0, 1], index_col=0) columns_names = self.setup_df() - multi_index = self.pd.MultiIndex.from_tuples(columns_names) + multi_index = pd.MultiIndex.from_tuples(columns_names) df.columns = multi_index self.df = self.convert_columns_to_original_format(df) @@ -2296,32 +2309,26 @@ def read_protein_vtf_in_df (self,filename,unit_length=None): coord_list = [] particles_dict = {} - if unit_length == None: + if unit_length is None: unit_length = 1 * self.units.angstrom with open (filename,'r') as protein_model: - - for line in protein_model : - line_split = line.split () - - if line_split : - line_header = line_split [0] - - if line_header == 'atom': - - atom_id = line_split [1] - atom_name = line_split [3] - atom_resname = line_split [5] - chain_id = line_split [9] - radius = float(line_split [11])*unit_length - sigma = 2*radius - particles_dict [int(atom_id)] = [atom_name , atom_resname, chain_id, sigma] - - elif line_header.isnumeric (): - - atom_coord = line_split [1:] - atom_coord = [(float(i)*unit_length).to('reduced_length').magnitude for i in atom_coord] - coord_list.append (atom_coord) + for line in protein_model : + line_split = line.split() + if line_split: + line_header = line_split[0] + if line_header == 'atom': + atom_id = line_split[1] + atom_name = line_split[3] + atom_resname = line_split[5] + chain_id = line_split[9] + radius = float(line_split [11])*unit_length + sigma = 2*radius + particles_dict [int(atom_id)] = [atom_name , atom_resname, chain_id, sigma] + elif line_header.isnumeric(): + atom_coord = line_split[1:] + atom_coord = [(float(i)*unit_length).to('reduced_length').magnitude for i in atom_coord] + coord_list.append (atom_coord) numbered_label = [] i = 0 @@ -2391,7 +2398,7 @@ def search_bond(self, particle_name1, particle_name2, hard_check=False, use_defa else: print("Bond not defined between particles ", particle_name1, " and ", particle_name2) if hard_check: - exit() + sys.exit(1) else: return @@ -2518,7 +2525,7 @@ def set_reduced_units(self, unit_length=None, unit_charge=None, temperature=None - If no `unit_charge` is given, a value of 1 elementary charge is assumed by default. - If no `Kw` is given, a value of 10^(-14) * mol^2 / l^2 is assumed by default. """ - self.units=self.pint.UnitRegistry() + self.units=pint.UnitRegistry() if unit_length is None: unit_length=0.355*self.units.nm if temperature is None: @@ -2913,8 +2920,8 @@ def setup_grxmc_unified (self, pH_res, c_salt_res, cation_name, anion_name, acti ionic_strength_res = 0.5*(cNa_res+cCl_res+cOH_res+cH_res) determined_activity_coefficient = activity_coefficient(ionic_strength_res) a_hydrogen = (10 ** (-pH_res) * self.units.mol/self.units.l).to('1/(N_A * reduced_length**3)') - a_cation = (cH_res+cNa_res).to('1/(N_A * reduced_length**3)') * self.np.sqrt(determined_activity_coefficient) - a_anion = (cH_res+cNa_res).to('1/(N_A * reduced_length**3)') * self.np.sqrt(determined_activity_coefficient) + a_cation = (cH_res+cNa_res).to('1/(N_A * reduced_length**3)') * np.sqrt(determined_activity_coefficient) + a_anion = (cH_res+cNa_res).to('1/(N_A * reduced_length**3)') * np.sqrt(determined_activity_coefficient) K_XX = a_cation * a_anion cation_es_type = self.df.loc[self.df['name']==cation_name].state_one.es_type.values[0] @@ -3034,13 +3041,13 @@ def setup_df (self): '': float}, } - self.df = self.pd.DataFrame(columns=self.pd.MultiIndex.from_tuples([(col_main, col_sub) for col_main, sub_cols in columns_dtypes.items() for col_sub in sub_cols.keys()])) + self.df = pd.DataFrame(columns=pd.MultiIndex.from_tuples([(col_main, col_sub) for col_main, sub_cols in columns_dtypes.items() for col_sub in sub_cols.keys()])) for level1, sub_dtypes in columns_dtypes.items(): for level2, dtype in sub_dtypes.items(): self.df[level1, level2] = self.df[level1, level2].astype(dtype) - columns_names = self.pd.MultiIndex.from_frame(self.df) + columns_names = pd.MultiIndex.from_frame(self.df) columns_names = columns_names.names return columns_names @@ -3061,7 +3068,6 @@ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining - Check the documentation of ESPResSo for more info about the potential https://espressomd.github.io/doc4.2.0/inter_non-bonded.html """ - from ast import literal_eval from itertools import combinations_with_replacement implemented_combining_rules = ['Lorentz-Berthelot'] compulsory_parameters_in_df = ['sigma','epsilon'] @@ -3080,7 +3086,7 @@ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining for key in compulsory_parameters_in_df: value_in_df=self.find_value_from_es_type(es_type=particle_type, column_name=key) - check_list.append(self.np.isnan(value_in_df)) + check_list.append(np.isnan(value_in_df)) if any(check_list): non_parametrized_labels.append(self.find_value_from_es_type(es_type=particle_type, column_name='label')) @@ -3097,14 +3103,14 @@ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining for key in lj_parameters_keys: lj_parameters[key].append(self.find_value_from_es_type(es_type=ptype, column_name=key)) # If one of the particle has sigma=0, no LJ interations are set up between that particle type and the others - if not all([ sigma_value.magnitude for sigma_value in lj_parameters["sigma"]]): + if not all(sigma_value.magnitude for sigma_value in lj_parameters["sigma"]): continue # Apply combining rule if combining_rule == 'Lorentz-Berthelot': sigma=(lj_parameters["sigma"][0]+lj_parameters["sigma"][1])/2 cutoff=(lj_parameters["cutoff"][0]+lj_parameters["cutoff"][1])/2 offset=(lj_parameters["offset"][0]+lj_parameters["offset"][1])/2 - epsilon=self.np.sqrt(lj_parameters["epsilon"][0]*lj_parameters["epsilon"][1]) + epsilon=np.sqrt(lj_parameters["epsilon"][0]*lj_parameters["epsilon"][1]) espresso_system.non_bonded_inter[type_pair[0],type_pair[1]].lennard_jones.set_params(epsilon = epsilon.to('reduced_energy').magnitude, sigma = sigma.to('reduced_length').magnitude, cutoff = cutoff.to('reduced_length').magnitude, @@ -3120,11 +3126,11 @@ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining self.add_value_to_df(index=index, key=('parameters_of_the_potential',''), - new_value=self.json.dumps(lj_params)) + new_value=lj_params, + non_standard_value=True) if non_parametrized_labels and warnings: print(f'WARNING: The following particles do not have a defined value of sigma or epsilon in pmb.df: {non_parametrized_labels}. No LJ interaction has been added in ESPResSo for those particles.') - self.df['parameters_of_the_potential'] = self.df['parameters_of_the_potential'].apply (lambda x: eval(str(x)) if self.pd.notnull(x) else x) return def setup_particle_sigma(self, topology_dict): @@ -3135,8 +3141,8 @@ def setup_particle_sigma(self, topology_dict): topology_dict(`dict`): {'initial_pos': coords_list, 'chain_id': id, 'sigma': sigma_value} ''' for residue in topology_dict.keys(): - residue_name = self.re.split(r'\d+', residue)[0] - residue_number = self.re.split(r'(\d+)', residue)[1] + residue_name = re.split(r'\d+', residue)[0] + residue_number = re.split(r'(\d+)', residue)[1] residue_sigma = topology_dict[residue]['sigma'] sigma = residue_sigma*self.units.nm index = self.df[(self.df['residue_id']==residue_number) & (self.df['name']==residue_name) ].index.values[0] @@ -3172,7 +3178,7 @@ def write_output_vtf_file(self, espresso_system, filename): for particle in espresso_system.part: type_label = self.find_value_from_es_type(es_type=particle.type, column_name='label') coordinates.write (f'atom {particle.id} radius 1 name {type_label} type {type_label}\n' ) - coordinates.write (f'timestep indexed\n') + coordinates.write ('timestep indexed\n') for particle in espresso_system.part: coordinates.write (f'{particle.id} \t {particle.pos[0]} \t {particle.pos[1]} \t {particle.pos[2]}\n') return diff --git a/samples/Beyer2024/README.md b/samples/Beyer2024/README.md index 15eadbe..b29606e 100644 --- a/samples/Beyer2024/README.md +++ b/samples/Beyer2024/README.md @@ -1,13 +1,15 @@ -The scripts in this folder are designed to reproduce the data and plots showcased in our publication [1]. +The scripts in this folder are designed to reproduce the data and plots showcased in our publication [^1]. To reproduce the data, one simply needs to run the following script: -```bash +```sh python3 create_paper_data.py --fig_label 7a --mode long-run --plot ``` -where the previous line will run the script to produce Fig. 7a in Ref.[1] The user can use the argparse argument `--fig_label` to create any of the plots that we presented in that publication as benchmarks: 7a, 7b, 7c, 8a, 8b, 9. The argparse `--mode` controls the statiscal accuracy (i.e. the number of samples) that the script measures. The mode `long-run` should be used to generate data with the same statistical accuracy than in Ref.[1]. The mode `short-run` can be used for a shorter run for testing or to trying out the scripts for each of our benchmarks: +where the previous line will run the script to produce Fig. 7a in Ref.[^1] The user can use the argparse argument `--fig_label` to create any of the plots that we presented in that publication as benchmarks: 7a, 7b, 7c, 8a, 8b, 9. The argparse `--mode` controls the statiscal accuracy (i.e. the number of samples) that the script measures. The mode `long-run` should be used to generate data with the same statistical accuracy than in Ref.[^1]. The mode `short-run` can be used for a shorter run for testing or to trying out the scripts for each of our benchmarks: + - peptide.py: for the peptide benchmarks - globular_protein.py: for the globular protein benchmarks - weak_polyelectrolyte_dialysis.py: for the weak polyelectrolyte dialysis benchmarks -The optional argparse argument `--plot` controls if the scripts generates the corresponding plot or if the data is simply stored to file. We note that the format of the plots can differ from that of our publication [1]. This scripts are part of the continous integration (CI) scheme of the pyMBE library and they are used to ensure that any stable version of the library reproduces the benchmarks. + +The optional argparse argument `--plot` controls if these scripts generate the corresponding plot or if the data is simply stored to file. We note that the format of the plots can differ from that of our publication [^1]. Theses scripts are part of the continous integration (CI) scheme of the pyMBE library and they are used to ensure that any stable version of the library reproduces the benchmarks. -[1] Beyer, D., Torres, P. B., Pineda, S. P., Narambuena, C. F., Grad, J. N., Košovan, P., & Blanco, P. M. (2024). pyMBE: the Python-based Molecule Builder for ESPResSo. arXiv preprint [arXiv:2401.14954](https://arxiv.org/abs/2401.14954). \ No newline at end of file +[^1]: Beyer, D., Torres, P. B., Pineda, S. P., Narambuena, C. F., Grad, J.-N., Košovan, P., & Blanco, P. M. (2024). pyMBE: the Python-based Molecule Builder for ESPResSo. arXiv preprint [arXiv:2401.14954](https://arxiv.org/abs/2401.14954). diff --git a/samples/Beyer2024/create_paper_data.py b/samples/Beyer2024/create_paper_data.py index 5c656a7..45e2f9e 100644 --- a/samples/Beyer2024/create_paper_data.py +++ b/samples/Beyer2024/create_paper_data.py @@ -1,7 +1,8 @@ # Import pyMBE and other libraries import pyMBE from lib import analysis -import os +import os +import sys import numpy as np import argparse import subprocess @@ -39,8 +40,8 @@ ## Peptide plots (Fig. 7) labels_fig7=["7a", "7b", "7c"] -if fig_label in labels_fig7 and not plot: - script_path=pmb.get_resource(f"samples/Beyer2024/peptide.py") +if fig_label in labels_fig7: + script_path=pmb.get_resource("samples/Beyer2024/peptide.py") if fig_label == "7a": sequence="K"*5+"D"*5 elif fig_label == "7b": @@ -51,7 +52,7 @@ raise RuntimeError() pH_range = np.linspace(2, 12, num=21) for pH in pH_range: - run_command=["python3", script_path, "--sequence", sequence, "--pH", str(pH), "--mode", mode] + run_command=[sys.executable, script_path, "--sequence", sequence, "--pH", str(pH), "--mode", mode] print(subprocess.list2cmdline(run_command)) subprocess.check_output(run_command) @@ -60,17 +61,20 @@ ## Protein plots (Fig. 8) labels_fig8=["8a", "8b"] + if fig_label in labels_fig8: - script_path=pmb.get_resource(f"samples/Beyer2024/globular_protein.py") + script_path=pmb.get_resource("samples/Beyer2024/globular_protein.py") pH_range = np.linspace(2, 7, num=11) + run_command_common=[sys.executable, script_path, "--mode", mode, "--no_verbose"] if fig_label == "8a": + protein_pdb = "1f6s" path_to_cg = f"parameters/globular_proteins/{protein_pdb}.vtf" for pH in pH_range: - run_command=["python3", script_path, "--pdb", protein_pdb, "--pH", str(pH), "--path_to_cg", path_to_cg, "--mode", mode, "--no_verbose", "--metal_ion_name", "Ca", "--metal_ion_charge", str(2)] + run_command=run_command_common + ["--pH", str(pH),"--pdb", protein_pdb, "--path_to_cg", path_to_cg, "--metal_ion_name", "Ca", "--metal_ion_charge", str(2)] print(subprocess.list2cmdline(run_command)) subprocess.check_output(run_command) @@ -78,7 +82,7 @@ protein_pdb = "1beb" path_to_cg = f"parameters/globular_proteins/{protein_pdb}.vtf" for pH in pH_range: - run_command=["python3", script_path, "--pdb", protein_pdb, "--pH", str(pH), "--path_to_cg", path_to_cg, "--mode", mode, "--no_verbose"] + run_command=run_command_common + ["--pH", str(pH),"--pdb", protein_pdb, "--path_to_cg", path_to_cg] print(subprocess.list2cmdline(run_command)) subprocess.check_output(run_command) else: @@ -86,24 +90,24 @@ ## Weak polyelectrolyte dialysis plot (Fig. 9) -if fig_label == "9" and not plot: - script_path=pmb.get_resource(f"samples/Beyer2024/weak_polyelectrolyte_dialysis.py") +if fig_label == "9": + script_path=pmb.get_resource("samples/Beyer2024/weak_polyelectrolyte_dialysis.py") pH_range = np.linspace(1, 13, num=13) c_salt_res = 0.01 * pmb.units.mol/pmb.units.L for pH in pH_range: - run_command=["python3", script_path, "--c_salt_res", str(0.01), "--c_mon_sys", str(0.435), "--pH_res", str(pH), "--pKa_value", str(4.0), "--mode", mode] + run_command=[sys.executable, script_path, "--c_salt_res", str(0.01), "--c_mon_sys", str(0.435), "--pH_res", str(pH), "--pKa_value", str(4.0), "--mode", mode] print(subprocess.list2cmdline(run_command)) subprocess.check_output(run_command) # Analyze all time series if fig_label in labels_fig7: - time_series_folder_path=pmb.get_resource(f"samples/Beyer2024/time_series/peptides") + time_series_folder_path=pmb.get_resource("samples/Beyer2024/time_series/peptides") if fig_label in labels_fig8: - time_series_folder_path=pmb.get_resource(f"samples/Beyer2024/time_series/globular_protein") + time_series_folder_path=pmb.get_resource("samples/Beyer2024/time_series/globular_protein") if fig_label == "9": - time_series_folder_path=pmb.get_resource(f"samples/Beyer2024/time_series/grxmc") + time_series_folder_path=pmb.get_resource("samples/Beyer2024/time_series/grxmc") data=analysis.analyze_time_series(path_to_datafolder=time_series_folder_path) @@ -177,6 +181,7 @@ # Calculate and plot Henderson-Hasselbalch (HH) if fig_label in labels_fig7: + pmb.define_peptide (name=sequence, sequence=sequence, model="1beadAA") @@ -210,6 +215,7 @@ Z_HH = pmb.calculate_HH(molecule_name=protein_pdb, pH_list=pH_range_HH) + print (Z_HH) # Plot HH plt.plot(pH_range_HH, Z_HH, diff --git a/samples/Beyer2024/globular_protein.py b/samples/Beyer2024/globular_protein.py index f992833..04c5217 100644 --- a/samples/Beyer2024/globular_protein.py +++ b/samples/Beyer2024/globular_protein.py @@ -2,6 +2,7 @@ from tqdm import tqdm import espressomd import argparse +import numpy as np import pandas as pd # Create an instance of pyMBE library @@ -14,10 +15,10 @@ from lib.handy_functions import setup_langevin_dynamics from lib import analysis # Here you can adjust the width of the panda columns displayed when running the code -pmb.pd.options.display.max_colwidth = 10 +pd.options.display.max_colwidth = 10 #This line allows you to see the complete amount of rows in the dataframe -pmb.pd.set_option('display.max_rows', None) +pd.set_option('display.max_rows', None) valid_modes=["short-run","long-run", "test"] @@ -214,11 +215,11 @@ # Setup the potential energy -if (WCA): +if WCA: pmb.setup_lj_interactions (espresso_system=espresso_system) minimize_espresso_system_energy (espresso_system=espresso_system) - if (Electrostatics): + if Electrostatics: setup_electrostatic_interactions (units=pmb.units, espresso_system=espresso_system, @@ -233,7 +234,7 @@ kT = pmb.kT, SEED = LANGEVIN_SEED) -observables_df = pmb.pd.DataFrame() +observables_df = pd.DataFrame() time_step = [] net_charge_list = [] @@ -285,7 +286,7 @@ if label in AA_label_list: charge_residues_per_type[label].append(charge_residues[amino]) - if (step % stride_traj == 0 ): + if step % stride_traj == 0 : n_frame +=1 pmb.write_output_vtf_file(espresso_system=espresso_system, filename=f"frames/trajectory{n_frame}.vtf") @@ -295,7 +296,7 @@ time_series["charge"].append(charge_dict["mean"]) for label in AA_label_list: - charge_amino = pmb.np.mean(charge_residues_per_type[label]) + charge_amino = np.mean(charge_residues_per_type[label]) time_series[label].append(charge_amino) data_path = args.output @@ -312,6 +313,3 @@ if verbose: print("*** DONE ***") - - - diff --git a/samples/Beyer2024/peptide.py b/samples/Beyer2024/peptide.py index 669b14b..5df3108 100644 --- a/samples/Beyer2024/peptide.py +++ b/samples/Beyer2024/peptide.py @@ -1,15 +1,9 @@ # Load espresso, sugar and other necessary libraries -import sys import os -import inspect import espressomd -import numpy as np import pandas as pd import argparse import tqdm -from espressomd.io.writer import vtf -from espressomd import interactions -from espressomd import electrostatics # Import pyMBE import pyMBE @@ -227,4 +221,3 @@ if verbose: print("*** DONE ***") - diff --git a/samples/Beyer2024/weak_polyelectrolyte_dialysis.py b/samples/Beyer2024/weak_polyelectrolyte_dialysis.py index faf73bd..f2fefcd 100644 --- a/samples/Beyer2024/weak_polyelectrolyte_dialysis.py +++ b/samples/Beyer2024/weak_polyelectrolyte_dialysis.py @@ -3,18 +3,13 @@ ####################################################### # Load python modules -import sys import os -import inspect import espressomd import numpy as np import pandas as pd from tqdm import tqdm -from espressomd import interactions -from espressomd import electrostatics from scipy import interpolate import argparse -import pickle # Import pyMBE import pyMBE diff --git a/samples/branched_polyampholyte.py b/samples/branched_polyampholyte.py index 8b1daa2..d651ef5 100644 --- a/samples/branched_polyampholyte.py +++ b/samples/branched_polyampholyte.py @@ -1,15 +1,11 @@ # Load espresso, pyMBE and other necessary libraries -import sys import os -import inspect -from matplotlib.style import use import espressomd import numpy as np import matplotlib.pyplot as plt import argparse import pandas as pd from espressomd.io.writer import vtf -from espressomd import interactions import pyMBE # Create an instance of pyMBE library @@ -98,10 +94,10 @@ # Define bonds bond_type = 'harmonic' -generic_bond_lenght=0.4 * pmb.units.nm +generic_bond_length=0.4 * pmb.units.nm generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') -harmonic_bond = {'r_0' : generic_bond_lenght, +harmonic_bond = {'r_0' : generic_bond_length, 'k' : generic_harmonic_constant, } @@ -176,10 +172,8 @@ # Main loop for performing simulations at different pH-values labels_obs=["time","charge"] -for index in range(len(pH_range)): +for pH_value in pH_range: - pH_value=pH_range[index] - time_series={} for label in labels_obs: @@ -196,7 +190,7 @@ else: RE.reaction( reaction_steps = total_ionisible_groups) - if ( step > steps_eq): + if step > steps_eq: # Get polyampholyte net charge charge_dict=pmb.calculate_net_charge(espresso_system=espresso_system, molecule_name="polyampholyte") @@ -206,9 +200,9 @@ time_series["time"].append(espresso_system.time) time_series["charge"].append(charge_dict["mean"]) - if (step % N_samples_print == 0) : + if step % N_samples_print == 0: N_frame+=1 - with open('frames/trajectory'+str(N_frame)+'.vtf', mode='w+t') as coordinates: + with open(f'frames/trajectory{N_frame}.vtf', mode='w+t') as coordinates: vtf.writevsf(espresso_system, coordinates) vtf.writevcf(espresso_system, coordinates) @@ -216,7 +210,7 @@ processed_data = block_analyze(full_data=pd.DataFrame(time_series, columns=labels_obs)) Z_pH.append(processed_data["mean", "charge"]) err_Z_pH.append(processed_data["err_mean", "charge"]) - print("pH = {:6.4g} done".format(pH_value)) + print(f"pH = {pH_value:6.4g} done") if args.test: diff --git a/samples/peptide.py b/samples/peptide.py index 7d835bf..c48e517 100644 --- a/samples/peptide.py +++ b/samples/peptide.py @@ -1,15 +1,10 @@ # Load espresso, pyMBE and other necessary libraries -import sys import os -import inspect -from matplotlib.style import use import espressomd import numpy as np import matplotlib.pyplot as plt from tqdm import tqdm from espressomd.io.writer import vtf -from espressomd import interactions -from espressomd import electrostatics import pyMBE # Create an instance of pyMBE library @@ -39,7 +34,7 @@ # Peptide parameters -sequence = 'nEEEEEc' +sequence = 'EEEEDDDD' model = '2beadAA' # Model with 2 beads per each aminoacid pep_concentration = 5.56e-4 *pmb.units.mol/pmb.units.L N_peptide_chains = 4 @@ -51,36 +46,10 @@ pmb.load_interaction_parameters (filename=path_to_interactions) pmb.load_pka_set (path_to_pka) -# Use a generic parametrization for the aminoacids not parametrized - -not_parametrized_neutral_aminoacids = ['A','N','Q','G','I','L','M','F','P','O','S','U','T','W','V','J'] -not_parametrized_acidic_aminoacids = ['C','c'] -not_parametrized_basic_aminoacids = ['R','n'] - -already_defined_AA=[] - -for aminoacid_key in sequence: - if aminoacid_key in already_defined_AA: - continue - if aminoacid_key in not_parametrized_acidic_aminoacids: - pmb.define_particle(name=aminoacid_key, - acidity='acidic', - sigma=0.35*pmb.units.nm, - epsilon=1*pmb.units('reduced_energy')) - elif aminoacid_key in not_parametrized_basic_aminoacids: - pmb.define_particle(name=aminoacid_key, acidity='basic',sigma=0.35*pmb.units.nm,epsilon=1*pmb.units('reduced_energy')) - - elif aminoacid_key in not_parametrized_neutral_aminoacids: - pmb.define_particle(name=aminoacid_key, - q=0, - sigma=0.35*pmb.units.nm, - epsilon=1*pmb.units('reduced_energy')) - already_defined_AA.append(aminoacid_key) - -generic_bond_lenght=0.4 * pmb.units.nm +generic_bond_length=0.4 * pmb.units.nm generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') -HARMONIC_parameters = {'r_0' : generic_bond_lenght, +HARMONIC_parameters = {'r_0' : generic_bond_length, 'k' : generic_harmonic_constant, } @@ -186,25 +155,25 @@ else: RE.reaction( reaction_steps = total_ionisible_groups) - if ( step > steps_eq): + if step > steps_eq: # Get peptide net charge charge_dict=pmb.calculate_net_charge ( espresso_system=espresso_system, molecule_name=peptide_name) Z_sim.append(charge_dict["mean"]) - if (step % N_samples_print == 0) : + if step % N_samples_print == 0: N_frame+=1 with open('frames/trajectory'+str(N_frame)+'.vtf', mode='w+t') as coordinates: vtf.writevsf(espresso_system, coordinates) vtf.writevcf(espresso_system, coordinates) Z_pH.append(Z_sim) - print("pH = {:6.4g} done".format(pH_value)) + print(f"pH = {pH_value:6.4g} done") # Estimate the statistical error and the autocorrelation time of the data print("Net charge analysis") -av_net_charge, err_net_charge, tau_net_charge, block_size_net_charge = block_analyze(input_data=Z_pH) +av_net_charge, err_net_charge, tau_net_charge, block_size_net_charge = block_analyze(full_data=Z_pH) # Calculate the ideal titration curve of the peptide with Henderson-Hasselbach equation Z_HH = pmb.calculate_HH(molecule_name=peptide_name, @@ -216,6 +185,6 @@ plt.legend() plt.xlabel('pH') plt.ylabel('Charge of the peptide / e') -plt.title('Peptide sequence: '+ sequence) +plt.title(f'Peptide sequence: {sequence}') plt.show() diff --git a/samples/peptide_mixture_grxmc_ideal.py b/samples/peptide_mixture_grxmc_ideal.py index e83f369..4382f6b 100644 --- a/samples/peptide_mixture_grxmc_ideal.py +++ b/samples/peptide_mixture_grxmc_ideal.py @@ -1,16 +1,11 @@ #Load espresso, pyMBE and other necessary libraries -import sys import os -import inspect -from matplotlib.style import use import espressomd import numpy as np import matplotlib.pyplot as plt import pandas as pd import argparse from espressomd.io.writer import vtf -from espressomd import interactions -from espressomd import electrostatics import pyMBE # Create an instance of pyMBE library @@ -36,14 +31,13 @@ args = parser.parse_args() if args.mode not in valid_modes: - raise ValueError(f"Mode {mode} is not currently supported, valid modes are {valid_modes}") + raise ValueError(f"Mode {args.mode} is not currently supported, valid modes are {valid_modes}") # The trajectories of the simulations will be stored using espresso built-up functions in separed files in the folder 'frames' if not os.path.exists('./frames'): os.makedirs('./frames') #Import functions from handy_functions script -from lib.handy_functions import minimize_espresso_system_energy from lib.analysis import block_analyze # Simulation parameters @@ -95,10 +89,10 @@ # Defines the bonds bond_type = 'harmonic' -generic_bond_lenght=0.4 * pmb.units.nm +generic_bond_length=0.4 * pmb.units.nm generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') -harmonic_bond = {'r_0' : generic_bond_lenght, +harmonic_bond = {'r_0' : generic_bond_length, 'k' : generic_harmonic_constant, } @@ -214,10 +208,8 @@ # Main loop for performing simulations at different pH-values labels_obs=["time","charge","num_plus"] -for index in range(len(pH_range)): +for pH_value in pH_range: - pH_value=pH_range[index] - time_series={} for label in labels_obs: @@ -237,7 +229,7 @@ else: RE.reaction(reaction_steps = total_ionisible_groups) - if (step > steps_eq): + if step > steps_eq: # Get peptide net charge z_one_object=0 for pid in particle_id_list: @@ -254,9 +246,9 @@ elif args.mode == 'unified': time_series["num_plus"].append(espresso_system.number_of_particles(type=type_map["Na"])) - if (step % N_samples_print == 0) : + if step % N_samples_print == 0: N_frame+=1 - with open('frames/trajectory'+str(N_frame)+'.vtf', mode='w+t') as coordinates: + with open(f'frames/trajectory{N_frame}.vtf', mode='w+t') as coordinates: vtf.writevsf(espresso_system, coordinates) vtf.writevcf(espresso_system, coordinates) @@ -269,7 +261,7 @@ err_concentration_plus = (processed_data["err_mean", "num_plus"]/(pmb.N_A * L**3)).to('mol/L') xi_plus.append((concentration_plus/ionic_strength_res).magnitude) err_xi_plus.append(err_concentration_plus/ionic_strength_res) - print("pH = {:6.4g} done".format(pH_value)) + print(f"pH = {pH_value:6.4g} done") if args.test: diff --git a/samples/plot_HH.py b/samples/plot_HH.py index 697e26d..f3757ed 100644 --- a/samples/plot_HH.py +++ b/samples/plot_HH.py @@ -1,8 +1,5 @@ # Plots the charge of a peptide with a given sequence using either a custom or reference set of pKa values -import os -import sys -import inspect import numpy as np import matplotlib.pyplot as plt @@ -29,7 +26,8 @@ model="1beadAA") if load_pka_set_from_file: - pka_set=pmb.load_pka_set(filename=path_to_pka) + pka_set=None + pmb.load_pka_set(filename=path_to_pka) print('pka_set stored in pyMBE: ', pmb.get_pka_set()) else: pka_set=custom_pka_set diff --git a/samples/salt_solution_gcmc.py b/samples/salt_solution_gcmc.py index feb6d26..42c8b57 100644 --- a/samples/salt_solution_gcmc.py +++ b/samples/salt_solution_gcmc.py @@ -1,18 +1,11 @@ # Load python modules -import sys import os -import inspect -from matplotlib.style import use import espressomd import numpy as np -import matplotlib.pyplot as plt import pandas as pd from scipy import interpolate import argparse from tqdm import tqdm -from espressomd.io.writer import vtf -from espressomd import interactions -from espressomd import electrostatics # Import pyMBE import pyMBE @@ -21,7 +14,6 @@ # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(SEED=42) -import warnings # Command line arguments @@ -55,7 +47,6 @@ #Import functions from handy_functions script from lib.handy_functions import minimize_espresso_system_energy -from lib.analysis import block_analyze from lib.handy_functions import setup_electrostatic_interactions from lib.handy_functions import setup_langevin_dynamics @@ -191,4 +182,4 @@ particle_id_list = pmb.df.loc[~pmb.df['molecule_id'].isna()].particle_id.dropna().to_list() #Save the pyMBE dataframe in a CSV file -pmb.write_pmb_df(filename='df.csv') +pmb.write_pmb_df(filename=f'{data_path}/df.csv') diff --git a/testsuite/bond_tests.py b/testsuite/bond_tests.py index acde00e..fdd196d 100644 --- a/testsuite/bond_tests.py +++ b/testsuite/bond_tests.py @@ -1,36 +1,36 @@ # Import pyMBE and other libraries import pyMBE import numpy as np - + # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(SEED=42) def check_bond_setup(bond_object, input_parameters, bond_type): - """ - Checks that pyMBE sets up a bond object correctly. - - Args: - bond_object(`espressomd.interactions`): instance of a espresso bond object. - input_parameters(`dict`): dictionary with the parameters for the bond potential. - bond_type(`str`): label identifying the bond type - """ - # Check that pyMBE stores the correct type of bond - if bond_type.lower() not in str(bond_object).lower(): - raise Exception("*** Test failed: pyMBE does not store the correct type of bond ***") - # Check that pyMBE defines the bond with the correct parameters - bond_params = bond_object.get_params() - reduced_units = {'r_0' : 'reduced_length', - 'k' : 'reduced_energy / reduced_length**2', - 'd_r_max': 'reduced_length'} - for key in input_parameters.keys(): - np.testing.assert_equal(actual=bond_params[key], - desired=input_parameters[key].m_as(reduced_units[key]), - verbose=True) - return 0 + """ + Checks that pyMBE sets up a bond object correctly. + + Args: + bond_object(`espressomd.interactions`): instance of a espresso bond object. + input_parameters(`dict`): dictionary with the parameters for the bond potential. + bond_type(`str`): label identifying the bond type + """ + # Check that pyMBE stores the correct type of bond + if bond_type.lower() not in str(bond_object).lower(): + raise Exception("*** Test failed: pyMBE does not store the correct type of bond ***") + # Check that pyMBE defines the bond with the correct parameters + bond_params = bond_object.get_params() + reduced_units = {'r_0' : 'reduced_length', + 'k' : 'reduced_energy / reduced_length**2', + 'd_r_max': 'reduced_length'} + for key in input_parameters.keys(): + np.testing.assert_equal(actual=bond_params[key], + desired=input_parameters[key].m_as(reduced_units[key]), + verbose=True) + return 0 #### DEFINE_BOND TESTS ### -print(f"*** Unit test: check that define_bond sets up a harmonic bond correctly ***") +print("*** Unit test: check that define_bond sets up a harmonic bond correctly ***") # Define dummy particle @@ -55,7 +55,7 @@ def check_bond_setup(bond_object, input_parameters, bond_type): # Clean pmb.df pmb.setup_df() -print(f"*** Unit test: check that define_bond sets up a FENE bond correctly ***") +print("*** Unit test: check that define_bond sets up a FENE bond correctly ***") # Define dummy particle @@ -84,7 +84,7 @@ def check_bond_setup(bond_object, input_parameters, bond_type): pmb.setup_df() -print(f"*** Unit test: check that define_bond sets up a harmonic and a FENE bonds correctly in the same script ***") +print("*** Unit test: check that define_bond sets up a harmonic and a FENE bonds correctly in the same script ***") # Define dummy particle @@ -124,7 +124,7 @@ def check_bond_setup(bond_object, input_parameters, bond_type): pmb.setup_df() -print(f"*** Unit test: check that define_bond raises a ValueError if the provided bond_type differs from the two currently implemented (harmonic and FENE) ***") +print("*** Unit test: check that define_bond raises a ValueError if the provided bond_type differs from the two currently implemented (harmonic and FENE) ***") # Define dummy particle @@ -143,9 +143,9 @@ def check_bond_setup(bond_object, input_parameters, bond_type): np.testing.assert_raises(ValueError, pmb.define_bond, **input_parameters) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") -print(f"*** Unit test: check that define_bond raises a ValueError if the provided dictionary does not have the Equilibrium length (r_0) for setting up a harmonic bond ***") +print("*** Unit test: check that define_bond raises a ValueError if the provided dictionary does not have the Equilibrium length (r_0) for setting up a harmonic bond ***") # Define dummy particle @@ -163,9 +163,9 @@ def check_bond_setup(bond_object, input_parameters, bond_type): np.testing.assert_raises(ValueError, pmb.define_bond, **input_parameters) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") -print(f"*** Unit test: check that define_bond raises a ValueError if the provided dictionary does not have the Magnitud of the potential (k) for setting up a harmonic bond ***") +print("*** Unit test: check that define_bond raises a ValueError if the provided dictionary does not have the Magnitud of the potential (k) for setting up a harmonic bond ***") # Define dummy particle @@ -183,9 +183,9 @@ def check_bond_setup(bond_object, input_parameters, bond_type): np.testing.assert_raises(ValueError, pmb.define_bond, **input_parameters) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") -print(f"*** Unit test: check that define_bond raises a ValueError if the provided dictionary does not have the Magnitud of the potential (k) for setting up a FENE bond ***") +print("*** Unit test: check that define_bond raises a ValueError if the provided dictionary does not have the Magnitud of the potential (k) for setting up a FENE bond ***") # Define dummy particle @@ -204,9 +204,9 @@ def check_bond_setup(bond_object, input_parameters, bond_type): np.testing.assert_raises(ValueError, pmb.define_bond, **input_parameters) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") -print(f"*** Unit test: check that define_bond raises a ValueError if the provided dictionary does not have the Maximal stretching length (d_r_max) for setting up a FENE bond ***") +print("*** Unit test: check that define_bond raises a ValueError if the provided dictionary does not have the Maximal stretching length (d_r_max) for setting up a FENE bond ***") # Define dummy particle @@ -225,11 +225,11 @@ def check_bond_setup(bond_object, input_parameters, bond_type): np.testing.assert_raises(ValueError, pmb.define_bond, **input_parameters) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") #### DEFINE_DEFAULT_BOND TESTS ### -print(f"*** Unit test: check that define_default_bond sets up a harmonic bond correctly ***") +print("*** Unit test: check that define_default_bond sets up a harmonic bond correctly ***") # Define dummy bond @@ -250,7 +250,7 @@ def check_bond_setup(bond_object, input_parameters, bond_type): pmb.setup_df() -print(f"*** Unit test: check that define_dafult_bond sets up a FENE bond correctly ***") +print("*** Unit test: check that define_dafult_bond sets up a FENE bond correctly ***") # Define dummy bond @@ -273,7 +273,7 @@ def check_bond_setup(bond_object, input_parameters, bond_type): pmb.setup_df() -print(f"*** Unit test: check that define_default_bond raises a ValueError if the provided bond_type differs from the two currently implemented (harmonic and FENE) ***") +print("*** Unit test: check that define_default_bond raises a ValueError if the provided bond_type differs from the two currently implemented (harmonic and FENE) ***") # Define dummy bond @@ -287,9 +287,9 @@ def check_bond_setup(bond_object, input_parameters, bond_type): np.testing.assert_raises(ValueError, pmb.define_default_bond, **input_parameters) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") -print(f"*** Unit test: check that define_default_bond raises a ValueError if the provided dictionary does not have the Equilibrium length (r_0) for setting up a harmonic bond ***") +print("*** Unit test: check that define_default_bond raises a ValueError if the provided dictionary does not have the Equilibrium length (r_0) for setting up a harmonic bond ***") # Define dummy bond @@ -302,9 +302,9 @@ def check_bond_setup(bond_object, input_parameters, bond_type): np.testing.assert_raises(ValueError, pmb.define_default_bond, **input_parameters) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") -print(f"*** Unit test: check that define_default_bond raises a ValueError if the provided dictionary does not have the Magnitud of the potential (k) for setting up a harmonic bond ***") +print("*** Unit test: check that define_default_bond raises a ValueError if the provided dictionary does not have the Magnitud of the potential (k) for setting up a harmonic bond ***") # Define dummy bond @@ -317,9 +317,9 @@ def check_bond_setup(bond_object, input_parameters, bond_type): np.testing.assert_raises(ValueError, pmb.define_default_bond, **input_parameters) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") -print(f"*** Unit test: check that define_default_bond raises a ValueError if the provided dictionary does not have the Magnitud of the potential (k) for setting up a FENE bond ***") +print("*** Unit test: check that define_default_bond raises a ValueError if the provided dictionary does not have the Magnitud of the potential (k) for setting up a FENE bond ***") # Define dummy bond @@ -333,9 +333,9 @@ def check_bond_setup(bond_object, input_parameters, bond_type): np.testing.assert_raises(ValueError, pmb.define_default_bond, **input_parameters) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") -print(f"*** Unit test: check that define_default_bond raises a ValueError if the provided dictionary does not have the Maximal stretching length (d_r_max) for setting up a FENE bond ***") +print("*** Unit test: check that define_default_bond raises a ValueError if the provided dictionary does not have the Maximal stretching length (d_r_max) for setting up a FENE bond ***") # Define dummy bond @@ -349,5 +349,4 @@ def check_bond_setup(bond_object, input_parameters, bond_type): np.testing.assert_raises(ValueError, pmb.define_default_bond, **input_parameters) -print(f"*** Unit test passed ***") - +print("*** Unit test passed ***") diff --git a/testsuite/cph_ideal_tests.py b/testsuite/cph_ideal_tests.py index 8d55113..5e3d888 100644 --- a/testsuite/cph_ideal_tests.py +++ b/testsuite/cph_ideal_tests.py @@ -1,26 +1,24 @@ # Import pyMBE and other libraries import pyMBE -from lib import analysis -import os -import tempfile +import sys import subprocess import numpy as np import pandas as pd # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(SEED=42) -script_path = pmb.get_resource(f"samples/branched_polyampholyte.py") -data_path = pmb.get_resource(f"samples/data_polyampholyte_cph.csv") +script_path = pmb.get_resource("samples/branched_polyampholyte.py") +data_path = pmb.get_resource("samples/data_polyampholyte_cph.csv") -print(f"*** Constant pH (cpH) implementation tests ***\n") -print(f"*** Test that our implementation of the cpH method reproduces the Henderson Hasselbalch equation for an ideal polyampholyte ***\n") +print("*** Constant pH (cpH) implementation tests ***\n") +print("*** Test that our implementation of the cpH method reproduces the Henderson Hasselbalch equation for an ideal polyampholyte ***\n") -run_command = ["python3", script_path, "--test"] +run_command = [sys.executable, script_path, "--test"] subprocess.check_output(run_command) data = pd.read_csv(data_path) # Check if charges agree np.testing.assert_allclose(data["Z_sim"], data["Z_HH"], rtol=0.15, atol=0.2) -print(f"*** Test passed ***\n") +print("*** Test passed ***\n") diff --git a/testsuite/create_molecule_position_test.py b/testsuite/create_molecule_position_test.py index 933063f..900a5d9 100644 --- a/testsuite/create_molecule_position_test.py +++ b/testsuite/create_molecule_position_test.py @@ -1,12 +1,11 @@ import numpy as np import espressomd -from espressomd import interactions # Create an instance of pyMBE library import pyMBE pmb = pyMBE.pymbe_library(SEED=42) -print(f"***create_molecule with input position list unit test ***") -print(f"*** Unit test: Check that the positions of the central bead of the first residue in the generated molecules are equal to the input positions***") +print("***create_molecule with input position list unit test ***") +print("*** Unit test: Check that the positions of the central bead of the first residue in the generated molecules are equal to the input positions***") # Simulation parameters pmb.set_reduced_units(unit_length=0.4*pmb.units.nm, verbose=False) @@ -33,10 +32,10 @@ ) bond_type = 'harmonic' -generic_bond_lenght=0.4 * pmb.units.nm +generic_bond_length=0.4 * pmb.units.nm generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') -harmonic_bond = {'r_0' : generic_bond_lenght, +harmonic_bond = {'r_0' : generic_bond_length, 'k' : generic_harmonic_constant, } @@ -82,10 +81,10 @@ np.testing.assert_almost_equal(pos_list, central_bead_pos) -print(f"*** Unit test passed ***\n") +print("*** Unit test passed ***\n") -print(f"*** Unit test: Check that center_molecule_in_simulation_box works correctly for cubic boxes***") +print("*** Unit test: Check that center_molecule_in_simulation_box works correctly for cubic boxes***") molecule_id = pmb.df.loc[pmb.df['name']==molecule_name].molecule_id.values[0] pmb.center_molecule_in_simulation_box(molecule_id=molecule_id, espresso_system=espresso_system) @@ -94,10 +93,10 @@ np.testing.assert_almost_equal(center_of_mass, center_of_mass_ref) -print(f"*** Unit test passed ***\n") +print("*** Unit test passed ***\n") -print(f"*** Unit test: Check that center_molecule_in_simulation_box works correctly for non-cubic boxes***") +print("*** Unit test: Check that center_molecule_in_simulation_box works correctly for non-cubic boxes***") espresso_system.change_volume_and_rescale_particles(d_new=3*L.to('reduced_length').magnitude, dir="z") molecule_id = pmb.df.loc[pmb.df['name']==molecule_name].molecule_id.values[2] @@ -107,4 +106,4 @@ np.testing.assert_almost_equal(center_of_mass, center_of_mass_ref) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") diff --git a/testsuite/gcmc_tests.py b/testsuite/gcmc_tests.py index 0fa65ca..1878780 100644 --- a/testsuite/gcmc_tests.py +++ b/testsuite/gcmc_tests.py @@ -1,24 +1,22 @@ # Import pyMBE and other libraries -import pyMBE -from lib import analysis -import os +import sys import tempfile import subprocess +import pyMBE +from lib import analysis import numpy as np -import pandas as pd -import argparse # Template of the test -def gcmc_test(mode): +def gcmc_test(script_path, mode): if mode == "ideal": - print(f"*** Running test for GCMC of salt solution (ideal). ***") + print("*** Running test for GCMC of salt solution (ideal). ***") elif mode == "interacting": - print(f"*** Running test for GCMC of salt solution (interacting). ***") + print("*** Running test for GCMC of salt solution (interacting). ***") with tempfile.TemporaryDirectory() as time_series_path: for c_salt_res in salt_concentrations: print(f"c_salt_res = {c_salt_res}") - run_command=["python3", script_path, "--c_salt_res", str(c_salt_res), "--output", time_series_path, "--mode", mode, "--no_verbose"] + run_command=[sys.executable, script_path, "--c_salt_res", str(c_salt_res), "--output", time_series_path, "--mode", mode, "--no_verbose"] print(subprocess.list2cmdline(run_command)) subprocess.check_output(run_command) # Analyze all time series @@ -28,19 +26,19 @@ def gcmc_test(mode): test_concentration=np.sort(data["csalt","value"].to_numpy(dtype=float)) ref_concentration=np.sort(data["mean","c_salt"].to_numpy()) np.testing.assert_allclose(test_concentration, ref_concentration, rtol=rtol, atol=atol) - print(f"*** Test was successful ***") + print("*** Test was successful ***") # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(SEED=42) -script_path=pmb.get_resource(f"samples/salt_solution_gcmc.py") +script_path=pmb.get_resource("samples/salt_solution_gcmc.py") salt_concentrations=[0.0001, 0.001, 0.01, 0.1] rtol=0.05 # relative tolerance atol=0.0 # absolute tolerance # Ideal test -gcmc_test("ideal") +gcmc_test(script_path, "ideal") # Interacting test -gcmc_test("interacting") +gcmc_test(script_path, "interacting") diff --git a/testsuite/generate_perpendicular_vectors_test.py b/testsuite/generate_perpendicular_vectors_test.py index 2906672..f837a23 100644 --- a/testsuite/generate_perpendicular_vectors_test.py +++ b/testsuite/generate_perpendicular_vectors_test.py @@ -3,6 +3,7 @@ from itertools import combinations pmb = pyMBE.pymbe_library(SEED=42) + def check_if_different_perpendicular_vectors_are_generated(vector,magnitude,n=50): """ Checks if pmb.generate_trial_perpendicular_vector generates perpendicular vectors to `vector`. @@ -33,31 +34,31 @@ def check_if_different_perpendicular_vectors_are_generated(vector,magnitude,n=50 desired = magnitude, decimal = 5, verbose = True) - return -print(f"*** generate_trial_perpendicular_vector unit tests ***") -print(f"*** Unit test: Check that the function creates perpendicular vectors to an arbitrary vector of the same magnitude ***") + +print("*** generate_trial_perpendicular_vector unit tests ***") +print("*** Unit test: Check that the function creates perpendicular vectors to an arbitrary vector of the same magnitude ***") vector = pmb.generate_random_points_in_a_sphere(center=[0,0,0], radius=1, n_samples=1, on_surface=True)[0] check_if_different_perpendicular_vectors_are_generated(vector=vector, magnitude=1) -print(f"*** Unit test passed ***") -print(f"*** Unit test: Check that the function creates perpendicular vectors to a vector with an arbitrary origin ***") +print("*** Unit test passed ***") +print("*** Unit test: Check that the function creates perpendicular vectors to a vector with an arbitrary origin ***") vector = pmb.generate_random_points_in_a_sphere(center=[1,2,3], radius=1, n_samples=1, on_surface=True)[0] check_if_different_perpendicular_vectors_are_generated(vector=vector, magnitude=1) -print(f"*** Unit test passed ***") -print(f"*** Unit test: Check that the function creates perpendicular vectors with a different magnitude ***") +print("*** Unit test passed ***") +print("*** Unit test: Check that the function creates perpendicular vectors with a different magnitude ***") vector = pmb.generate_random_points_in_a_sphere(center=[1,2,3], radius=2, n_samples=1, on_surface=True)[0] check_if_different_perpendicular_vectors_are_generated(vector=vector, magnitude=3) -print(f"*** Unit test passed ***") -print(f"*** All unit tests passed ***") +print("*** Unit test passed ***") +print("*** All unit tests passed ***") diff --git a/testsuite/globular_protein_tests.py b/testsuite/globular_protein_tests.py index 7383ce6..b953c62 100644 --- a/testsuite/globular_protein_tests.py +++ b/testsuite/globular_protein_tests.py @@ -1,6 +1,7 @@ # Import pyMBE and other libraries import pyMBE from lib import analysis +import sys import tempfile import subprocess import numpy as np @@ -22,17 +23,13 @@ def run_protein_test(script_path, test_pH_values, protein_pdb, rtol, atol,mode=" print(f"Running tests for {protein_pdb}") with tempfile.TemporaryDirectory() as time_series_path: - for pH in test_pH_values: print(f"pH = {pH}") - + run_command=[sys.executable, script_path, "--pdb", protein_pdb, "--pH", str(pH), + "--path_to_cg", f"parameters/globular_proteins/{protein_pdb}.vtf", + "--mode", "test", "--no_verbose", "--output", time_series_path] if protein_pdb == '1f6s': - run_command=["python3", script_path, "--pdb", protein_pdb, "--pH", str(pH), "--path_to_cg", f"parameters/globular_proteins/{protein_pdb}.vtf", "--metal_ion_name","Ca", "--metal_ion_charge", "2","--mode", "test", "--no_verbose", "--output", time_series_path] - - else: - run_command=["python3", script_path, "--pdb", protein_pdb, "--pH", str(pH), "--path_to_cg", f"parameters/globular_proteins/{protein_pdb}.vtf", "--mode", "test", "--no_verbose", "--output", time_series_path] - - + run_command+=["--metal_ion_name","Ca", "--metal_ion_charge", "2"] print(subprocess.list2cmdline(run_command)) subprocess.check_output(run_command) # Analyze all time series @@ -42,7 +39,7 @@ def run_protein_test(script_path, test_pH_values, protein_pdb, rtol, atol,mode=" if mode == "test": # Get reference test data - ref_data=pd.read_csv(data_path+f"/{protein_pdb}.csv", header=[0, 1]) + ref_data=pd.read_csv(f"{data_path}/{protein_pdb}.csv", header=[0, 1]) # Check charge test_charge=np.sort(data["mean","charge"].to_numpy()) ref_charge=np.sort(ref_data["mean","charge"].to_numpy()) @@ -58,7 +55,7 @@ def run_protein_test(script_path, test_pH_values, protein_pdb, rtol, atol,mode=" # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(SEED=42) -script_path=pmb.get_resource(f"samples/Beyer2024/globular_protein.py") +script_path=pmb.get_resource("samples/Beyer2024/globular_protein.py") test_pH_values=[2,5,7] rtol=0.1 # relative tolerance atol=0.5 # absolute tolerance diff --git a/testsuite/grxmc_ideal_tests.py b/testsuite/grxmc_ideal_tests.py index 83564cc..8265d8b 100644 --- a/testsuite/grxmc_ideal_tests.py +++ b/testsuite/grxmc_ideal_tests.py @@ -1,21 +1,19 @@ # Import pyMBE and other libraries import pyMBE -from lib import analysis -import os -import tempfile +import sys import subprocess import numpy as np import pandas as pd # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(SEED=42) -script_path = pmb.get_resource(f"samples/peptide_mixture_grxmc_ideal.py") -data_path = pmb.get_resource(f"samples/data_peptide_grxmc.csv") +script_path = pmb.get_resource("samples/peptide_mixture_grxmc_ideal.py") +data_path = pmb.get_resource("samples/data_peptide_grxmc.csv") -print(f"*** Grand reaction (G-RxMC) implementation tests ***\n") -print(f"*** Test that our implementation of the original G-RxMC method reproduces the Henderson-Hasselbalch equation corrected with the Donnan potential (HH+Don) for an ideal mixture of peptides ***") +print("*** Grand reaction (G-RxMC) implementation tests ***\n") +print("*** Test that our implementation of the original G-RxMC method reproduces the Henderson-Hasselbalch equation corrected with the Donnan potential (HH+Don) for an ideal mixture of peptides ***") -run_command = ["python3", script_path, "--mode", "standard", "--test"] +run_command = [sys.executable, script_path, "--mode", "standard", "--test"] subprocess.check_output(run_command) data = pd.read_csv(data_path) @@ -24,12 +22,12 @@ # Check if partition coefficients agree np.testing.assert_allclose(data["xi_sim"], data["xi_HH_Donnan"], rtol=0.1, atol=0.1) -print(f"*** Test passed ***\n") +print("*** Test passed ***\n") -print(f"*** Test that our implementation of the G-RxMC method with unified ion types reproduces HH+Don for an ideal mixture of peptides ***") +print("*** Test that our implementation of the G-RxMC method with unified ion types reproduces HH+Don for an ideal mixture of peptides ***") -run_command = ["python3", script_path, "--mode", "unified", "--test"] +run_command = [sys.executable, script_path, "--mode", "unified", "--test"] subprocess.check_output(run_command) data = pd.read_csv(data_path) @@ -38,4 +36,4 @@ # Check if partition coefficients agree np.testing.assert_allclose(data["xi_sim"], data["xi_HH_Donnan"], rtol=0.1, atol=0.1) -print(f"*** Test passed ***\n") +print("*** Test passed ***\n") diff --git a/testsuite/henderson_hasselbalch_tests.py b/testsuite/henderson_hasselbalch_tests.py index e0b2731..de36f6b 100644 --- a/testsuite/henderson_hasselbalch_tests.py +++ b/testsuite/henderson_hasselbalch_tests.py @@ -1,9 +1,8 @@ import numpy as np import pyMBE -from itertools import combinations pmb = pyMBE.pymbe_library(SEED=42) -print(f"*** Running HH tests ***\n") +print("*** Running HH tests ***\n") # Peptide parameters sequence1 = 5 * "D" + 8 * "H" @@ -27,7 +26,7 @@ model = model) -print(f"*** Check that Henderson-Hasselbalch equation works correctly ***") +print("*** Check that Henderson-Hasselbalch equation works correctly ***") # Calculate charge according to Henderson-Hasselbalch equation pH_range = np.linspace(2, 12, num=200) @@ -43,14 +42,14 @@ """ data_path = pmb.get_resource(path="testsuite/henderson_hasselbalch_tests_data") -ref_data_HH = np.loadtxt(data_path+f"/HH.csv", delimiter=",") +ref_data_HH = np.loadtxt(f"{data_path}/HH.csv", delimiter=",") np.testing.assert_allclose(Z_HH_1, ref_data_HH[0,:]) np.testing.assert_allclose(Z_HH_2, ref_data_HH[1,:]) -print(f"*** Test passed ***\n") +print("*** Test passed ***\n") -print(f"*** Check that Henderson-Hasselbalch equation + Donnan works correctly ***") +print("*** Check that Henderson-Hasselbalch equation + Donnan works correctly ***") HH_Donnan_dict = pmb.calculate_HH_Donnan( c_macro = {"peptide_1": pep1_concentration, @@ -64,14 +63,14 @@ np.savetxt(f, np.asarray(HH_Donnan_dict["charges_dict"]["peptide_2"]).reshape(1,-1), delimiter=",") """ -ref_data_HH_Donnan = np.loadtxt(data_path+f"/HH_Donnan.csv", delimiter=",") +ref_data_HH_Donnan = np.loadtxt(f"{data_path}/HH_Donnan.csv", delimiter=",") np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_1"], ref_data_HH_Donnan[0,:]) np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_2"], ref_data_HH_Donnan[1,:]) -print(f"*** Test passed ***\n") +print("*** Test passed ***\n") -print(f"*** Check that HH and HH_Don are consistent ***") +print("*** Check that HH and HH_Don are consistent ***") Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1", pH_list = HH_Donnan_dict["pH_system_list"]) @@ -82,4 +81,4 @@ np.testing.assert_allclose(Z_HH_2, HH_Donnan_dict["charges_dict"]["peptide_2"]) -print(f"*** Test passed***") +print("*** Test passed***") diff --git a/testsuite/lj_tests.py b/testsuite/lj_tests.py index a268745..42aaa2b 100644 --- a/testsuite/lj_tests.py +++ b/testsuite/lj_tests.py @@ -7,7 +7,7 @@ pmb = pyMBE.pymbe_library(SEED=42) print("*** LJ unit tests ***") -print(f"*** Unit test: check that all LJ input parameters in define_particle are correctly stored in pmb.df***") +print("*** Unit test: check that all LJ input parameters in define_particle are correctly stored in pmb.df***") input_parameters={"name":"A", "sigma":1*pmb.units.nm, @@ -23,7 +23,7 @@ desired=input_parameters[parameter_key], verbose=True) print("*** Unit test passed ***") -print(f"*** Unit test: check that `offset` defaults to 0***") +print("*** Unit test: check that `offset` defaults to 0***") # Clean pmb.df pmb.setup_df() # Define dummy particle @@ -35,7 +35,7 @@ verbose=True) print("*** Unit test passed ***") -print(f"*** Unit test: check that `cutoff` defaults to `2**(1./6.) reduced_length` ***") +print("*** Unit test: check that `cutoff` defaults to `2**(1./6.) reduced_length` ***") # Clean pmb.df pmb.setup_df() # Define dummy particle @@ -47,31 +47,31 @@ verbose=True) print("*** Unit test passed ***") -print(f"*** Unit test: check that define_particle raises a ValueError if sigma is provided with the wrong dimensionality ***") +print("*** Unit test: check that define_particle raises a ValueError if sigma is provided with the wrong dimensionality ***") input_parameters={"name":"B", "sigma":1*pmb.units.ns } np.testing.assert_raises(ValueError, pmb.define_particle, **input_parameters) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") -print(f"*** Unit test: check that define_particle raises a ValueError if offset is provided with the wrong dimensionality ***") +print("*** Unit test: check that define_particle raises a ValueError if offset is provided with the wrong dimensionality ***") input_parameters={"name":"B", "offset":1*pmb.units.ns } np.testing.assert_raises(ValueError, pmb.define_particle, **input_parameters) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") -print(f"*** Unit test: check that define_particle raises a ValueError if cutoff is provided with the wrong dimensionality ***") +print("*** Unit test: check that define_particle raises a ValueError if cutoff is provided with the wrong dimensionality ***") input_parameters={"name":"B", "cutoff":1*pmb.units.ns } np.testing.assert_raises(ValueError, pmb.define_particle, **input_parameters) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") -print(f"*** Unit test: check that define_particle raises a ValueError if epsilon is provided with the wrong dimensionality ***") +print("*** Unit test: check that define_particle raises a ValueError if epsilon is provided with the wrong dimensionality ***") input_parameters={"name":"B", "epsilon":1*pmb.units.ns } np.testing.assert_raises(ValueError, pmb.define_particle, **input_parameters) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") -print(f"*** Unit test: test that setup_lj_interactions sets up inert particles correctly ***") +print("*** Unit test: test that setup_lj_interactions sets up inert particles correctly ***") # Clean pmb.df pmb.setup_df() @@ -116,8 +116,8 @@ desired=A_input_parameters["epsilon"].to("reduced_energy").magnitude, verbose=True) -print(f"*** Unit test passed ***") -print(f"*** Unit test: test that setup_lj_interactions sets up acid/base particles correctly ***") +print("*** Unit test passed ***") +print("*** Unit test: test that setup_lj_interactions sets up acid/base particles correctly ***") # Check B-B, B-BH, BH-BH setup @@ -133,8 +133,8 @@ desired=B_input_parameters["epsilon"].to("reduced_energy").magnitude, verbose=True) -print(f"*** Unit test passed ***") -print(f"*** Unit test: test that setup_lj_interactions sets up LJ interaction between different particles correctly ***") +print("*** Unit test passed ***") +print("*** Unit test: test that setup_lj_interactions sets up LJ interaction between different particles correctly ***") # Calculate the reference parameters @@ -157,9 +157,9 @@ np.testing.assert_equal(actual=setup_lj_parameters["epsilon"], desired=ref_lj_parameters["epsilon"].to("reduced_energy").magnitude, verbose=True) -print(f"*** Unit test passed ***") +print("*** Unit test passed ***") -print(f"*** Unit test: test that setup_lj_interactions does not set up any LJ interactions for particles with sigma = 0 ***") +print("*** Unit test: test that setup_lj_interactions does not set up any LJ interactions for particles with sigma = 0 ***") lj_labels=pmb.filter_df("LennardJones")["name"].values # Check that no interaction between particle C and any other particle has been set up @@ -169,5 +169,5 @@ if "C" in label: raise Exception("*** Unit Test failed ***") -print(f"*** Unit test passed ***") -print(f"*** All unit tests passed ***") +print("*** Unit test passed ***") +print("*** All unit tests passed ***") diff --git a/testsuite/parameter_test.py b/testsuite/parameter_test.py index fbbc388..af78f9c 100644 --- a/testsuite/parameter_test.py +++ b/testsuite/parameter_test.py @@ -3,8 +3,7 @@ pmb = pyMBE.pymbe_library(SEED=42) - -print(f"*** Check that the different pKa sets are correctly formatted ***") +print("*** Check that the different pKa sets are correctly formatted ***") pymbe_root = pathlib.Path(pyMBE.__file__).parent pka_root = pymbe_root / "parameters" / "pka_sets" @@ -16,4 +15,4 @@ pmb.load_pka_set(path_to_pka, verbose=False) -print(f"*** Test passed ***") +print("*** Test passed ***") diff --git a/testsuite/peptide_tests.py b/testsuite/peptide_tests.py index aa18d1f..261499e 100644 --- a/testsuite/peptide_tests.py +++ b/testsuite/peptide_tests.py @@ -1,9 +1,9 @@ # Import pyMBE and other libraries -import pyMBE -from lib import analysis -import os +import sys import tempfile import subprocess +import pyMBE +from lib import analysis import numpy as np import pandas as pd @@ -25,7 +25,7 @@ def run_peptide_test(script_path,test_pH_values,sequence,rtol,atol,mode="test"): with tempfile.TemporaryDirectory() as time_series_path: for pH in test_pH_values: print(f"pH = {pH}") - run_command=["python3", script_path, "--sequence", sequence, "--pH", str(pH), "--mode", "test", "--no_verbose", "--output", time_series_path] + run_command=[sys.executable, script_path, "--sequence", sequence, "--pH", str(pH), "--mode", "test", "--no_verbose", "--output", time_series_path] print(subprocess.list2cmdline(run_command)) subprocess.check_output(run_command) # Analyze all time series @@ -52,7 +52,7 @@ def run_peptide_test(script_path,test_pH_values,sequence,rtol,atol,mode="test"): # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(SEED=42) -script_path=pmb.get_resource(f"samples/Beyer2024/peptide.py") +script_path=pmb.get_resource("samples/Beyer2024/peptide.py") test_pH_values=[3,7,11] rtol=0.1 # relative tolerance atol=0.5 # absolute tolerance diff --git a/testsuite/read-write-df_test.py b/testsuite/read-write-df_test.py index d1056d0..a8f2b88 100644 --- a/testsuite/read-write-df_test.py +++ b/testsuite/read-write-df_test.py @@ -1,8 +1,8 @@ -import espressomd import re -from ast import literal_eval -from espressomd import interactions -from pandas.testing import assert_frame_equal +import ast +import tempfile +import espressomd +import pandas as pd # Create an instance of pyMBE library import pyMBE @@ -57,10 +57,10 @@ bond_type = 'harmonic' -generic_bond_lenght=0.4 * pmb.units.nm +generic_bond_length=0.4 * pmb.units.nm generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') -harmonic_bond = {'r_0' : generic_bond_lenght, +harmonic_bond = {'r_0' : generic_bond_length, 'k' : generic_harmonic_constant, } @@ -84,25 +84,25 @@ # Setup potential energy pmb.setup_lj_interactions (espresso_system=espresso_system) -pmb.pd.options.display.max_colwidth = 10 +pd.options.display.max_colwidth = 10 # Copy the pmb.df into a new DF for the unit test stored_df = pmb.df.copy() -# Write the pymbe DF to a csv file -df_filename = 'df-example_molecule.csv' -pmb.write_pmb_df (filename = df_filename) +with tempfile.TemporaryDirectory() as tmp_directory: + # Write the pymbe DF to a csv file + df_filename = f'{tmp_directory}/df-example_molecule.csv' + pmb.write_pmb_df (filename = df_filename) -# Read the same pyMBE df from a csv a load it in pyMBE -read_df = pmb.read_pmb_df(filename = df_filename) + # Read the same pyMBE df from a csv a load it in pyMBE + read_df = pmb.read_pmb_df(filename = df_filename) # Preprocess data for the Unit Test # The espresso bond object must be converted to a dict in order to compare them using assert_frame_equal -stored_df['bond_object'] = stored_df['bond_object'].apply(lambda x: literal_eval(re.subn('HarmonicBond', '', str(x))[0]) if pmb.pd.notnull(x) else x) -read_df['bond_object'] = read_df['bond_object'].apply(lambda x: literal_eval(re.subn('HarmonicBond', '', str(x))[0]) if pmb.pd.notnull(x) else x) +stored_df['bond_object'] = stored_df['bond_object'].apply(lambda x: ast.literal_eval(re.subn('HarmonicBond', '', str(x))[0]) if pd.notnull(x) else x) +read_df['bond_object'] = read_df['bond_object'].apply(lambda x: ast.literal_eval(re.subn('HarmonicBond', '', str(x))[0]) if pd.notnull(x) else x) -print(f"*** Unit test: check that the dataframe stored by pyMBE to file is the same as the one read from the file (same values and variable types) ***") +print("*** Unit test: check that the dataframe stored by pyMBE to file is the same as the one read from the file (same values and variable types) ***") -assert_frame_equal (stored_df, read_df, check_exact= True) -print (f"*** Unit test passed***") - +pd.testing.assert_frame_equal (stored_df, read_df, check_exact= True) +print("*** Unit test passed***") diff --git a/testsuite/seed_test.py b/testsuite/seed_test.py index 86a5623..9f6f0d8 100644 --- a/testsuite/seed_test.py +++ b/testsuite/seed_test.py @@ -1,7 +1,5 @@ import numpy as np import espressomd -import warnings -from espressomd import interactions import pyMBE espresso_system = espressomd.System(box_l = [100]*3) @@ -28,10 +26,10 @@ def build_peptide_in_espresso(SEED): pmb.define_peptide(name=peptide_name, sequence=sequence, model=model) # Bond parameters - generic_bond_lenght=0.4 * pmb.units.nm + generic_bond_length=0.4 * pmb.units.nm generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') - HARMONIC_parameters = {'r_0' : generic_bond_lenght, + HARMONIC_parameters = {'r_0' : generic_bond_length, 'k' : generic_harmonic_constant} pmb.define_default_bond(bond_type = 'harmonic', @@ -53,10 +51,10 @@ def build_peptide_in_espresso(SEED): return np.asarray(positions) -print(f"*** Check that the using the same SEED results in the same initial particle positions***") +print("*** Check that the using the same SEED results in the same initial particle positions***") positions1 = build_peptide_in_espresso(42) positions2 = build_peptide_in_espresso(42) np.testing.assert_almost_equal(positions1, positions2) -print(f"*** Test passed ***") +print("*** Test passed ***") diff --git a/testsuite/set_particle_acidity_test.py b/testsuite/set_particle_acidity_test.py index 744ddf8..9c5623f 100644 --- a/testsuite/set_particle_acidity_test.py +++ b/testsuite/set_particle_acidity_test.py @@ -33,7 +33,7 @@ def check_acid_base_setup(input_parameters,acidity_setup): print("*** Particle acidity unit tests ***") -print(f"*** Unit test: check that all acid/base input parameters in define_particle for an inert particle are correctly stored in pmb.df***") +print("*** Unit test: check that all acid/base input parameters in define_particle for an inert particle are correctly stored in pmb.df***") # Clean pmb.df pmb.setup_df() input_parameters={"name":"I", @@ -48,8 +48,8 @@ def check_acid_base_setup(input_parameters,acidity_setup): check_acid_base_setup(input_parameters=input_parameters, acidity_setup=acidity_setup) -print(f"*** Unit test passed ***") -print(f"*** Unit test: check that all acid/base input parameters in define_particle for an acid are correctly stored in pmb.df***") +print("*** Unit test passed ***") +print("*** Unit test: check that all acid/base input parameters in define_particle for an acid are correctly stored in pmb.df***") # Clean pmb.df pmb.setup_df() input_parameters={"name":"A", @@ -62,8 +62,8 @@ def check_acid_base_setup(input_parameters,acidity_setup): check_acid_base_setup(input_parameters=input_parameters, acidity_setup=acidity_setup) -print(f"*** Unit test passed ***") -print(f"*** Unit test: check that all acid/base input parameters in define_particle for a base are correctly stored in pmb.df***") +print("*** Unit test passed ***") +print("*** Unit test: check that all acid/base input parameters in define_particle for a base are correctly stored in pmb.df***") # Clean pmb.df pmb.setup_df() input_parameters={"name":"B", @@ -76,8 +76,8 @@ def check_acid_base_setup(input_parameters,acidity_setup): check_acid_base_setup(input_parameters=input_parameters, acidity_setup=acidity_setup) -print(f"*** Unit test passed ***") -print(f"*** Unit test: check that set_particle_acidity raises a ValueError if pKa is not provided and pKa is acidic or basic ***") +print("*** Unit test passed ***") +print("*** Unit test: check that set_particle_acidity raises a ValueError if pKa is not provided and pKa is acidic or basic ***") input_parametersA={"name":"A", "acidity": "acidic" } @@ -85,10 +85,10 @@ def check_acid_base_setup(input_parameters,acidity_setup): "acidity": "basic"} np.testing.assert_raises(ValueError, pmb.set_particle_acidity,**input_parametersA) np.testing.assert_raises(ValueError, pmb.set_particle_acidity, **input_parametersB) -print(f"*** Unit test passed ***") -print(f"*** Unit test: check that set_particle_acidity raises a ValueError if a non-supported acidity is provided ***") +print("*** Unit test passed ***") +print("*** Unit test: check that set_particle_acidity raises a ValueError if a non-supported acidity is provided ***") input_parametersA={"name":"A", "acidity": "random" } np.testing.assert_raises(ValueError, pmb.set_particle_acidity,**input_parametersA) -print(f"*** Unit test passed ***") -print(f"*** All unit tests passed ***") +print("*** Unit test passed ***") +print("*** All unit tests passed ***") diff --git a/testsuite/weak_polyelectrolyte_dialysis_test.py b/testsuite/weak_polyelectrolyte_dialysis_test.py index d74cb72..c770cc3 100644 --- a/testsuite/weak_polyelectrolyte_dialysis_test.py +++ b/testsuite/weak_polyelectrolyte_dialysis_test.py @@ -1,7 +1,7 @@ # Import pyMBE and other libraries import pyMBE from lib import analysis -import os +import sys import tempfile import subprocess import numpy as np @@ -10,7 +10,7 @@ # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(SEED=42) -script_path=pmb.get_resource(f"samples/Beyer2024/weak_polyelectrolyte_dialysis.py") +script_path=pmb.get_resource("samples/Beyer2024/weak_polyelectrolyte_dialysis.py") test_pH_values=[3,5,7,9] c_salt_res=0.01 c_mon_sys=0.435 @@ -18,11 +18,11 @@ rtol=0.1 # relative tolerance atol=0.05 # absolute tolerance -print(f"*** Running test for weak polyelectrolyte dialysis with G-RxMC (interacting). ***") +print("*** Running test for weak polyelectrolyte dialysis with G-RxMC (interacting). ***") with tempfile.TemporaryDirectory() as time_series_path: for pH in test_pH_values: print(f"pH = {pH}") - run_command=["python3", script_path, "--c_salt_res", str(c_salt_res), "--c_mon_sys", str(c_mon_sys), "--pKa_value", str(pKa_value), "--pH_res", str(pH), "--mode", "test", "--output", time_series_path, "--no_verbose"] + run_command=[sys.executable, script_path, "--c_salt_res", str(c_salt_res), "--c_mon_sys", str(c_mon_sys), "--pKa_value", str(pKa_value), "--pH_res", str(pH), "--mode", "test", "--output", time_series_path, "--no_verbose"] print(subprocess.list2cmdline(run_command)) subprocess.check_output(run_command) # Analyze all time series @@ -30,10 +30,9 @@ data_path=pmb.get_resource(path="testsuite/weak_polyelectrolyte_dialysis_test_data") # Get reference test data -ref_data=pd.read_csv(data_path+f"/data.csv", header=[0, 1]) +ref_data=pd.read_csv(f"{data_path}/data.csv", header=[0, 1]) # Check charge test_charge=np.sort(data["mean","alpha"].to_numpy()) ref_charge=np.sort(ref_data["mean","alpha"].to_numpy()) np.testing.assert_allclose(test_charge, ref_charge, rtol=rtol, atol=atol) -print(f"*** Test was successful ***") - +print("*** Test was successful ***") diff --git a/tutorials/pyMBE_tutorial.ipynb b/tutorials/pyMBE_tutorial.ipynb index 5731a0b..b12d7b3 100644 --- a/tutorials/pyMBE_tutorial.ipynb +++ b/tutorials/pyMBE_tutorial.ipynb @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -65,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -87,23 +87,9 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Current set of reduced units:\n", - "0.355 nanometer = 1 reduced_length\n", - "4.11640356238e-21 joule = 1 reduced_energy\n", - "Temperature: 298.15 kelvin\n", - "1.602176634e-19 coulomb = 1 reduced_charge\n", - "\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "pmb.print_reduced_units()" ] @@ -117,23 +103,9 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Current set of reduced units:\n", - "0.49999999999999994 nanometer = 1 reduced_length\n", - "4.11640356238e-21 joule = 1 reduced_energy\n", - "Temperature: 298.15 kelvin\n", - "8.01088317e-19 coulomb = 1 reduced_charge\n", - "\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "pmb.set_reduced_units(unit_length = 0.5*pmb.units.nm, \n", " unit_charge = 5*pmb.units.e)\n", @@ -156,17 +128,9 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The side of the simulation box is 7.5 nanometer = 14.999999999999998 reduced_length\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "Box_L = 7.5*pmb.units.nm\n", "\n", @@ -191,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -211,86 +175,9 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
namepmb_typeaciditysigmacutoffoffsetepsilonstate_one
labeles_typecharge
0Naparticleinert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
\n", - "
" - ], - "text/plain": [ - " name pmb_type acidity sigma cutoff \\\n", - " \n", - "0 Na particle inert 0.35 nanometer 1.122462048309373 reduced_length \n", - "\n", - " offset epsilon state_one \n", - " label es_type charge \n", - "0 0 reduced_length 1 reduced_energy Na 0 0 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "pmb.filter_df(pmb_type = 'particle')" ] @@ -304,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -323,416 +210,9 @@ }, { "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
namepmb_typeparticle_idaciditysigmacutoffoffsetepsilonstate_one
labeles_typecharge
0Naparticle0inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
1Naparticle1inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
2Naparticle2inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
3Naparticle3inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
4Naparticle4inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
5Naparticle5inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
6Naparticle6inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
7Naparticle7inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
8Naparticle8inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
9Naparticle9inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
10Naparticle10inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
11Naparticle11inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
12Naparticle12inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
13Naparticle13inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
14Naparticle14inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
15Naparticle15inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
16Naparticle16inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
17Naparticle17inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
18Naparticle18inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
19Naparticle19inert0.35 nanometer1.122462048309373 reduced_length0 reduced_length1 reduced_energyNa00
\n", - "
" - ], - "text/plain": [ - " name pmb_type particle_id acidity sigma \\\n", - " \n", - "0 Na particle 0 inert 0.35 nanometer \n", - "1 Na particle 1 inert 0.35 nanometer \n", - "2 Na particle 2 inert 0.35 nanometer \n", - "3 Na particle 3 inert 0.35 nanometer \n", - "4 Na particle 4 inert 0.35 nanometer \n", - "5 Na particle 5 inert 0.35 nanometer \n", - "6 Na particle 6 inert 0.35 nanometer \n", - "7 Na particle 7 inert 0.35 nanometer \n", - "8 Na particle 8 inert 0.35 nanometer \n", - "9 Na particle 9 inert 0.35 nanometer \n", - "10 Na particle 10 inert 0.35 nanometer \n", - "11 Na particle 11 inert 0.35 nanometer \n", - "12 Na particle 12 inert 0.35 nanometer \n", - "13 Na particle 13 inert 0.35 nanometer \n", - "14 Na particle 14 inert 0.35 nanometer \n", - "15 Na particle 15 inert 0.35 nanometer \n", - "16 Na particle 16 inert 0.35 nanometer \n", - "17 Na particle 17 inert 0.35 nanometer \n", - "18 Na particle 18 inert 0.35 nanometer \n", - "19 Na particle 19 inert 0.35 nanometer \n", - "\n", - " cutoff offset epsilon \\\n", - " \n", - "0 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "1 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "2 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "3 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "4 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "5 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "6 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "7 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "8 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "9 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "10 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "11 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "12 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "13 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "14 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "15 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "16 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "17 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "18 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "19 1.122462048309373 reduced_length 0 reduced_length 1 reduced_energy \n", - "\n", - " state_one \n", - " label es_type charge \n", - "0 Na 0 0 \n", - "1 Na 0 0 \n", - "2 Na 0 0 \n", - "3 Na 0 0 \n", - "4 Na 0 0 \n", - "5 Na 0 0 \n", - "6 Na 0 0 \n", - "7 Na 0 0 \n", - "8 Na 0 0 \n", - "9 Na 0 0 \n", - "10 Na 0 0 \n", - "11 Na 0 0 \n", - "12 Na 0 0 \n", - "13 Na 0 0 \n", - "14 Na 0 0 \n", - "15 Na 0 0 \n", - "16 Na 0 0 \n", - "17 Na 0 0 \n", - "18 Na 0 0 \n", - "19 Na 0 0 " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "pmb.filter_df(pmb_type = 'particle')" ] @@ -746,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -766,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -783,45 +263,9 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "pmb.filter_df(pmb_type = 'particle')" ] @@ -851,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -884,7 +328,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -904,7 +348,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1629,10 +1073,20 @@ "metadata": {}, "outputs": [], "source": [ + "pmb.load_interaction_parameters (filename = pmb.get_resource('parameters/peptides/Lunkad2021.json'))\n", "pmb.load_interaction_parameters (filename = pmb.get_resource('parameters/peptides/Lunkad2021.json'))\n", "pmb.add_bonds_to_espresso (espresso_system = espresso_system)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(pmb.df)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1748,7 +1202,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/tutorials/solution_tutorial.ipynb b/tutorials/solution_tutorial.ipynb index f40aeaf..07095be 100644 --- a/tutorials/solution_tutorial.ipynb +++ b/tutorials/solution_tutorial.ipynb @@ -28,24 +28,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Current set of reduced units:\n", - "1.5 nanometer = 1 reduced_length\n", - "4.11640356238e-21 joule = 1 reduced_energy\n", - "Temperature: 298.15 kelvin\n", - "8.01088317e-19 coulomb = 1 reduced_charge\n", - "\n", - "The side of the simulation box is 7.5 nanometer = 4.999999999999999 reduced_length\n" - ] - } - ], + "outputs": [], "source": [ "# Import pyMBE and ESPResSo\n", "import pyMBE\n", @@ -92,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -259,7 +244,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/visualization/create_vmd_script.py b/visualization/create_vmd_script.py index 5173046..44e8fc3 100644 --- a/visualization/create_vmd_script.py +++ b/visualization/create_vmd_script.py @@ -41,7 +41,7 @@ def organize_numbered_files (input_file_names): for frame in input_file_names: num = re.findall(r'\d+', frame) - if ( num == [] ): + if num == []: raise ValueError("\nfilename " + frame + " not compatible with the expected filename format filename_{number}.{extension}\n") files_ordered [int(num[0])]=frame @@ -87,7 +87,7 @@ def read_vtf_files (input_file_names): frame_bond ={} N_frame=0 - if args.numbered_files == True: + if args.numbered_files: files_ordered = organize_numbered_files (input_file_names) else: @@ -114,12 +114,12 @@ def read_vtf_files (input_file_names): header=line_clean[0] - if (header == "unitcell"): + if header == "unitcell": box = line_clean[1:4] unitcell[N_frame] = box - elif (header == "atom"): + elif header == "atom": id_atom = line_clean[1] r_atom = line_clean[3] @@ -132,12 +132,12 @@ def read_vtf_files (input_file_names): type_list.append(type_atom) - elif (header == "bond"): + elif header == "bond": bond_info = line_clean[1] frame_bond_list.append(bond_info) - elif (header.isnumeric()): + elif header.isnumeric(): id_part = line_clean[0] coord_part = line_clean[1:] @@ -199,7 +199,7 @@ def count_particles_types_from_vtf_frames (data_vtf_frames): radius_types[atom_type]=atom[1] for type in type_list: - if (n_type_frame[type] > n_type[type]): + if n_type_frame[type] > n_type[type]: n_type[type] = n_type_frame[type] n_part = sum(n_type.values()) @@ -236,7 +236,7 @@ def read_xyz_files (input_file_names, box = 20.0): coord_list = [] comment=[] - if args.numbered_files == True: + if args.numbered_files: files_ordered = organize_numbered_files (input_file_names) else: files_ordered = not_numbered_files (input_file_names) @@ -252,11 +252,11 @@ def read_xyz_files (input_file_names, box = 20.0): for line in file: - if (len(line) < 10): + if len(line) < 10: frame_list.append(int(line)) - elif (line[0] != "#"): + elif line[0] != "#": type_atom.append(line.split()[0]) x_coord = line.split()[1] @@ -333,7 +333,7 @@ def count_particles_types_from_xyz_file (data_xyz) : for type in type_list: - if (n_type_frame[type] > n_type[type]): + if n_type_frame[type] > n_type[type]: n_type[type] = n_type_frame[type] ### maximum of each type @@ -383,7 +383,7 @@ def reorganize_xyz_data (data_xyz,particles_count) : frame_line = nframe*frame + line - if (type == data_xyz['type_atom'] [frame_line] ): + if type == data_xyz['type_atom'][frame_line]: atom_type = data_xyz['type_atom'] [frame_line] n_type_frame [atom_type] += 1 @@ -507,7 +507,7 @@ def create_output_vtf_trajectory_for_vmd (output_trajectory,data_frames,new_part n_part +=1 output_vmd.write (f'atom {str(hidden_id)}') - output_vmd.write (f' radius 1') + output_vmd.write (' radius 1') output_vmd.write (f' name {hidden_name}') output_vmd.write (f' typ {hidden_type} \n') @@ -515,7 +515,7 @@ def create_output_vtf_trajectory_for_vmd (output_trajectory,data_frames,new_part for frame in range(data_frames['N_frame']): - output_vmd.write(f'\ntimestep indexed\n') + output_vmd.write('\ntimestep indexed\n') frame_atom_list = frame_atom[frame] frame_coord_list = frame_coord[frame] @@ -524,10 +524,10 @@ def create_output_vtf_trajectory_for_vmd (output_trajectory,data_frames,new_part for id_at in range(n_part ): - if (id_at < len(frame_coord_list)): + if id_at < len(frame_coord_list): at_list=frame_atom_list[id_at] - at_type=(at_list[-1]) + at_type=at_list[-1] at_coord=frame_coord_list[id_at][1] poss_id=frame_ids_type[str(at_type)].copy() new_id=poss_id[0] @@ -595,16 +595,16 @@ def create_output_visualization_tcl (output_trajectory,output_visualization, typ with open(output_visualization, "w") as output_file: - output_file.write(f'mol delete top\n') - output_file.write(f'display depthcue off\n') + output_file.write('mol delete top\n') + output_file.write('display depthcue off\n') output_file.write(f'mol load vtf {os.path.basename(output_trajectory)}\n') - output_file.write(f'mol delrep 0 top\n') - output_file.write(f'display resetview\n') - output_file.write(f'mol representation CPK 1.000000 0.000000\n') - output_file.write(f'mol selelection all\n') - output_file.write(f'animate goto 0\n') - output_file.write(f'color Display Background white\n') - output_file.write(f'axes location off\n') + output_file.write('mol delrep 0 top\n') + output_file.write('display resetview\n') + output_file.write('mol representation CPK 1.000000 0.000000\n') + output_file.write('mol selelection all\n') + output_file.write('animate goto 0\n') + output_file.write('color Display Background white\n') + output_file.write('axes location off\n') output_file.write(f'pbc box_draw -color gray -width {str(width_box_line)}\n') color=0 @@ -623,9 +623,9 @@ def create_output_visualization_tcl (output_trajectory,output_visualization, typ output_file.write(f'set {var} [atomselect top "name {typ} "]\n') output_file.write(f'${var} set radius 1.0\n') output_file.write(f'mol selection name {typ}\n') - output_file.write(f'mol material Opaque\n') - output_file.write(f'mol color ColorID 1 \n') - output_file.write(f'mol addrep top\n\n') + output_file.write('mol material Opaque\n') + output_file.write('mol color ColorID 1 \n') + output_file.write('mol addrep top\n\n') elif typ in basic_charged_groups: @@ -634,9 +634,9 @@ def create_output_visualization_tcl (output_trajectory,output_visualization, typ output_file.write(f'set {var} [atomselect top "name {typ} "]\n') output_file.write(f'${var} set radius 1.0\n') output_file.write(f'mol selection name {typ}\n') - output_file.write(f'mol material Opaque\n') - output_file.write(f'mol color ColorID 0 \n') - output_file.write(f'mol addrep top\n\n') + output_file.write('mol material Opaque\n') + output_file.write('mol color ColorID 0 \n') + output_file.write('mol addrep top\n\n') elif typ == 'CA': @@ -645,9 +645,9 @@ def create_output_visualization_tcl (output_trajectory,output_visualization, typ output_file.write(f'set {var} [atomselect top "name {typ} "]\n') output_file.write(f'${var} set radius 1.0\n') output_file.write(f'mol selection name {typ}\n') - output_file.write(f'mol material Opaque\n') - output_file.write(f'mol color ColorID 4 \n') - output_file.write(f'mol addrep top\n\n') + output_file.write('mol material Opaque\n') + output_file.write('mol color ColorID 4 \n') + output_file.write('mol addrep top\n\n') elif typ == 'Na': @@ -656,9 +656,9 @@ def create_output_visualization_tcl (output_trajectory,output_visualization, typ output_file.write(f'set {var} [atomselect top "name {typ} "]\n') output_file.write(f'${var} set radius 1.0\n') output_file.write(f'mol selection name {typ}\n') - output_file.write(f'mol material Opaque\n') - output_file.write(f'mol color ColorID 10 \n') - output_file.write(f'mol addrep top\n\n') + output_file.write('mol material Opaque\n') + output_file.write('mol color ColorID 10 \n') + output_file.write('mol addrep top\n\n') elif typ == 'Cl': @@ -667,9 +667,9 @@ def create_output_visualization_tcl (output_trajectory,output_visualization, typ output_file.write(f'set {var} [atomselect top "name {typ} "]\n') output_file.write(f'${var} set radius 1.0\n') output_file.write(f'mol selection name {typ}\n') - output_file.write(f'mol material Opaque\n') - output_file.write(f'mol color ColorID 9 \n') - output_file.write(f'mol addrep top\n\n') + output_file.write('mol material Opaque\n') + output_file.write('mol color ColorID 9 \n') + output_file.write('mol addrep top\n\n') else: @@ -678,9 +678,9 @@ def create_output_visualization_tcl (output_trajectory,output_visualization, typ output_file.write(f'set {var} [atomselect top "name {typ} "]\n') output_file.write(f'${var} set radius 1.0\n') output_file.write(f'mol selection name {typ}\n') - output_file.write(f'mol material Opaque\n') - output_file.write(f'mol color ColorID 7 \n') - output_file.write(f'mol addrep top\n\n') + output_file.write('mol material Opaque\n') + output_file.write('mol color ColorID 7 \n') + output_file.write('mol addrep top\n\n') else: @@ -691,9 +691,9 @@ def create_output_visualization_tcl (output_trajectory,output_visualization, typ output_file.write(f'set {var} [atomselect top "name {typ} "]\n') output_file.write(f'${var} set radius 1.0\n') output_file.write(f'mol selection name {typ}\n') - output_file.write(f'mol material Opaque\n') + output_file.write('mol material Opaque\n') output_file.write(f'mol color ColorID {str(color)}\n') - output_file.write(f'mol addrep top\n\n') + output_file.write('mol addrep top\n\n') color=color+1 @@ -701,11 +701,11 @@ def create_output_visualization_tcl (output_trajectory,output_visualization, typ var="typ"+hidden_type output_file.write(f'set {var} [atomselect top "name {hidden_type} "]\n') output_file.write(f'${var} set radius 1\n') - output_file.write(f'mol representation CPK 1.000000 16.000000\n') + output_file.write('mol representation CPK 1.000000 16.000000\n') output_file.write(f'mol selection name {hidden_type}\n') - output_file.write(f'mol material Goodsell\n') + output_file.write('mol material Goodsell\n') output_file.write(f'mol color ColorID {str(8)}\n') - output_file.write(f'mol addrep top\n\n') + output_file.write('mol addrep top\n\n') return 0 @@ -714,7 +714,7 @@ def create_output_visualization_tcl (output_trajectory,output_visualization, typ parser = argparse.ArgumentParser(description='Script to create merge the trajectory frames to a single vtf trajectory for VMD and produce a tcl script for visualization of this trajectory.') parser.add_argument('file_names', nargs ='*',help='Expected the input files') parser.add_argument('--print_input_file_names', help='print the input file names on the screen while loading', default=False, action='store_true') - parser.add_argument('--separator', type = str, help = 'the character that separates the base name and the frame number', default='\d+' ) + parser.add_argument('--separator', type = str, help = 'the character that separates the base name and the frame number', default=r'\d+' ) parser.add_argument('--numbered_files', help = 'work with numbered files', default=False, action= 'store_true') parser.add_argument('--color_code', type = str, help = 'for protein color_code add protein as an argument', default='' ) @@ -729,7 +729,7 @@ def create_output_visualization_tcl (output_trajectory,output_visualization, typ input_file_names += glob(file_name) extension = os.path.splitext((file_name))[1] extension_list.append(extension) - if count_files > 1 and args.numbered_files == False: + if count_files > 1 and not args.numbered_files: raise ValueError ('\nSeems like multiple frames have been provided. To work with multiple trajectory files add --numbered_files\n') check_files_extension_consistency (extension_list) @@ -738,8 +738,8 @@ def create_output_visualization_tcl (output_trajectory,output_visualization, typ extension = os.path.splitext((file_name))[1] basename = re.split(f'{args.separator}',(os.path.splitext((file_name))[0].split('/'))[-1])[0] - if print_input_file_names == True: - print (f'Format: {extension} Loading: {file}') + if print_input_file_names: + print (f'Format: {extension} Loading: {args.file_names}') output_trajectory = os.path.join(os.path.dirname(file_name), f'{basename}-frames.vtf' ) output_visualization = os.path.join(os.path.dirname(file_name), 'visualization.tcl') diff --git a/visualization/vmd-traj.py b/visualization/vmd-traj.py index ed714b7..57869fe 100644 --- a/visualization/vmd-traj.py +++ b/visualization/vmd-traj.py @@ -1,7 +1,5 @@ -from os import listdir -from os.path import isfile, join -import numpy as np -import math as mt +import os +import math import re frame_dir='./frames' # Path to the directory of the frame trajectories @@ -13,16 +11,16 @@ # Create the list of files -files = [name for name in listdir(frame_dir) if isfile(join(frame_dir, name))] +files = [name for name in os.listdir(frame_dir) + if os.path.isfile(os.path.join(frame_dir, name))] -files=sorted(files) +files=sorted(files) -# Order the trajectory files +# Order the trajectory files files_ordered={} for frame in files: - num=re.findall(r'\d+', frame) files_ordered[int(num[0])]=frame @@ -43,50 +41,37 @@ maximum_index=max(files_ordered.keys()) for frame in range(minimum_index,maximum_index+1): # 'file' is a builtin type, 'frame' is a less-ambiguous variable name. - + file_name=files_ordered[frame] path=frame_dir+"/"+file_name print("Loading frame " + path) with open(path) as f: - frame_atom_list=[] frame_bond_list=[] frame_coord_list=[] for line in f: - - line_clean=line.split() - + line_clean=line.split() if line_clean: # Non-empty line - header=line_clean[0] - - if (header == "unitcell"): - + if header == "unitcell": box=line_clean[1:4] unitcell[N_frame]=box - - elif (header == "atom"): - + elif header == "atom": id_atom=line_clean[1] r_atom=line_clean[3] name_atom=line_clean[5] type_atom=line_clean[7] data_atom=[id_atom,r_atom,name_atom,type_atom] frame_atom_list.append(data_atom) - if type_atom not in type_list: - type_list.append(type_atom) - - elif (header == "bond"): - + elif header == "bond": bond_info=line_clean[1] frame_bond_list.append(bond_info) - elif (header.isnumeric()): - + elif header.isnumeric(): id_part=line_clean[0] coord_part=line_clean[1:] frame_coord_list.append([id_part,coord_part]) @@ -111,29 +96,20 @@ for frame in range(N_frame): - for typ in type_list: - N_type_frame[typ]=0 - - for at in atom[frame]: + for at in atom[frame]: at_type=at[-1] N_type_frame[at_type]+=1 - if at_type not in radi_types.keys(): - radi_types[at_type]=at[1] - if at_type not in name_types.keys(): - name_types[at_type]=at[2] - - for typ in type_list: - - if (N_type_frame[typ] > N_type[typ]): - N_type[typ]=N_type_frame[typ] + for typ in type_list: + if N_type_frame[typ] > N_type[typ]: + N_type[typ]=N_type_frame[typ] N_part=sum(N_type.values()) @@ -143,11 +119,9 @@ ids_type={} for typ in type_list: - ids_list=[] for id_at in range(id0,N_type[typ]+id0): - ids_list.append(id_at) id0=id_at+1 @@ -173,14 +147,10 @@ f_vmd.write("\n") for id_at in range(N_part): f_vmd.write("atom " + str(id_at)) - for id_list in ids_type.values(): - if id_at in id_list: - key_list = list(ids_type.keys()) val_list = list(ids_type.values()) - position = val_list.index(id_list) typ=key_list[position] @@ -202,61 +172,47 @@ f_vmd.write("timestep indexed") f_vmd.write("\n") - if (unitcell[frame] != unit_cell_frame): - + if unitcell[frame] != unit_cell_frame: f_vmd.write("unitcell") - for cell_coor in unit_cell_frame: f_vmd.write(" "+str(cell_coor)) - f_vmd.write("\n") frame_atom_list=atom[frame] frame_coord_list=coord[frame] frame_ids_type=ids_type.copy() - - for id_at in range(N_part): - - if (id_at < len(frame_coord_list)): + for id_at in range(N_part): + if id_at < len(frame_coord_list): at_list=frame_atom_list[id_at] at_type=int(at_list[-1]) at_coord=frame_coord_list[id_at][1] poss_id=frame_ids_type[str(at_type)].copy() new_id=poss_id[0] f_vmd.write(str(new_id)+" ") - + n_cor=0 for cor in at_coord: - if folded: - size_box=float(unit_cell_frame[n_cor]) cor=float(cor) - pbc_cor=cor-mt.floor(cor/size_box)*size_box + pbc_cor=cor-math.floor(cor/size_box)*size_box f_vmd.write(str(pbc_cor)+" ") n_cor+=1 else: - f_vmd.write(str(cor)+" ") - - f_vmd.write("\n") + + f_vmd.write("\n") poss_id.remove(new_id) frame_ids_type[str(at_type)]=poss_id - - for id_list in frame_ids_type.values(): + for id_list in frame_ids_type.values(): if id_list: - for id_at in id_list: - f_vmd.write(str(id_at)+" ") - for cor in reservoir_pos: - f_vmd.write(str(cor)+" ") - f_vmd.write("\n") # Write the hidding particle @@ -284,7 +240,6 @@ color=0 for typ in type_list: - var="typ"+typ f_visual.write("set " + var + ' [atomselect top "name ' + typ + ' "]') f_visual.write("\n") @@ -302,8 +257,6 @@ f_visual.write("\n \n") color=color+1 - - var="typ"+hidden_type f_visual.write("set " + var + ' [atomselect top "name ' + hidden_type + ' "]') f_visual.write("\n")