Skip to content

Commit

Permalink
Improve proteins script (#39)
Browse files Browse the repository at this point in the history
* added new functions

* added test script

* changes on new functions

* clean up of the test

* added write_pmb_df to all the samples

* added read-write test to makefile

* minor changes

* clean up and minor changes

* remove pmb.df as an argument in write_pmb_df() function

* added assert_frame_equal

* added dtypes on the df setup

* minor changes

* minor change and cleanup

* clean up

* clean up

* adapted define_particle

* adapted setup_df with new columns

* minor change

* adapted new cols with units

* minor change

* uncomment a line

* changed dtype of es_type

* make CI aware when the test fails, tidy up test and Makefile

* changes on parameters_of_the_potential format

* deleted unnecesary lines

* adapted lj_test

* changes on add_bond_in_df

* remove commented lines on add_nomd_in_df

* fix import of handy_tools

* rename activamte_motion.. function

* adapted standireze_data

* added standireze data

* added test script for globular protein

* added different mode

* adapted globular_protein script to new format

* changes on protein test

* minor changes

* added to read radius from protein vtf file

* remove sigma dict

* changes to add amino charge in time_series output

* added to reproduce globular protein plot

* changes on data path

* remove run_test_protein

* minor changes

* minor change

* fix calculation of charge per AAA

* fix ion size, fix setup of the simulation, revert particle size to 0.35 nm for protein beads

* changes on radius to calculate mean radius of aminos

* changes on protein radius assignment

* new cg model

* minor typo fix

* minor typo fix on protein pdb

* added new 1f6s cg

* added test data

* deleted folders

* added globular protein test to makefile

* make cpH CI test less overzeaulous

* avoid defining acidity in define_particles_AA, remove calls to os.system

---------

Co-authored-by: Pablo M. Blanco <[email protected]>
Co-authored-by: blancoapa <[email protected]>
  • Loading branch information
3 people authored Apr 24, 2024
1 parent a5edd7c commit 2b6868b
Show file tree
Hide file tree
Showing 15 changed files with 693 additions and 546 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ tests:
python3 testsuite/grxmc_ideal_tests.py
python3 testsuite/peptide_tests.py
python3 testsuite/weak_polyelectrolyte_dialysis_test.py
python3 testsuite/globular_protein_tests.py
sample:
python3 sample_scripts/peptide_simulation_example.py


visual:
python3 handy_scripts/vmd-traj.py
vmd -e visualization.tcl
Expand Down
7 changes: 5 additions & 2 deletions lib/create_cg_from_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,14 +308,17 @@ def create_sidechain_beads (pdb_df) :
x_coord_r = pdb_df['x_pos']
y_coord_r = pdb_df['y_pos']
z_coord_r = pdb_df['z_pos']

pdb_df['radius_mean'] = pdb_df.groupby(['residue_name'])['radius_r'].transform (np.mean).round(4)

atom_number_pdb_r = pd.Series(pdb_df.residue_number)
resname_r = pd.Series(pdb_df.residue_name)
resname_one_letter_r= pd.Series (pdb_df.resname_one_letter)
resid_r = pd.Series(pdb_df.residue_number)

radius_r = pd.Series(pdb_df['radius_r'])
radius_r = pd.Series(pdb_df['radius_mean'])
chain_r = pd.Series(pdb_df.chain_id)

residues_bead = {'atom_numbers_r': atom_number_pdb_r,\
'x_coord_r':x_coord_r,'y_coord_r':y_coord_r,'z_coord_r':z_coord_r, \
'resname_r':resname_r,'resname_one_letter_r':resname_one_letter_r , 'radius_r':radius_r, 'chain_r': chain_r, 'resid_r': resid_r}
Expand Down Expand Up @@ -507,7 +510,7 @@ def create_output_coarse_grain_model_as_vtf_file (coarse_grain,beads_bond, iden
parser = argparse.ArgumentParser(description='Creates a coarse-grained model from a protein structure given in PDB format')
parser.add_argument('--filename', dest='filename', help='\nPath to the PDB file\n')
parser.add_argument('--download_pdb', dest='pdb_code', help='Downloads the corresponding PDB from RCSB and coarse-grains it')
parser.add_argument('--model', dest='model', default='2bead', type=str , help='\nCoarse-grained model to be used\n')
parser.add_argument('--model', dest='model', default='2beadAA', type=str , help='\nCoarse-grained model to be used\n')
parser.add_argument('--chain_id', type=str , help='\nSpecific chaid_id to coarse-grain\n')
parser.add_argument('--verbose', dest='verbose', action='store_true')
parser.add_argument('--no-verbose', dest='verbose', action='store_false')
Expand Down
36 changes: 33 additions & 3 deletions maintainer/standarize_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
supported_filenames=["data_landsgesell.csv",
"Glu-HisMSDE.csv",
"Lys-AspMSDE.csv",
"histatin5_SoftMatter.txt"]
"histatin5_SoftMatter.txt",
"1beb-10mM-torres.dat",
"1f6s-10mM-torres.dat"]


parser = argparse.ArgumentParser(description='Script to standarize the data from various authors')
parser.add_argument('--src_filename',
Expand All @@ -27,7 +30,9 @@
output_filenames={"data_landsgesell.csv": "Landsgesell2020a.csv",
"Lys-AspMSDE.csv": "Lunkad2021a.csv",
"Glu-HisMSDE.csv": "Lunkad2021b.csv",
"histatin5_SoftMatter.txt": "Blanco2020a.csv"}
"histatin5_SoftMatter.txt": "Blanco2020a.csv",
"1beb-10mM-torres.dat": "Torres2017.csv",
"1f6s-10mM-torres.dat": "Torres2022.csv"}

# Sanity checks
if filename not in supported_filenames:
Expand All @@ -37,27 +42,51 @@
ref_path=pmb.get_resource(f"testsuite/data/src/{filename}")
Refs_lunkad=["Glu-HisMSDE.csv","Lys-AspMSDE.csv"]
Ref_blanco=["histatin5_SoftMatter.txt"]

Ref_torres = ["1f6s-10mM-torres.dat","1beb-10mM-torres.dat" ]
Ref_landsgesell=["data_landsgesell.csv"]


if filename in Refs_lunkad:
data=pd.read_csv(ref_path)
Z_ref = 5*-1*data['aaa']+5*data['aab']
# Error propagation calculation
# 1/4 factor added to correct for bug in the original calculation of the error reported by the authors
Z_ref_err = 5/4*np.sqrt((data['eaa'])**2+(data['eab'])**2)
pH_range = np.linspace(2, 12, num=21)

elif filename in Ref_blanco:
data=np.loadtxt(ref_path, delimiter=",")
Z_ref=data[:,1]
Z_ref_err=data[:,2]

pH_range = np.linspace(2, 12, num=21)

elif filename in Ref_torres:

Z_ref = []
Z_ref_err = []
pH_range = []

with open (ref_path,'r') as file:
for line in file:
line_split = line.split ()
pH = float (line_split[0])
pH_range.append(pH)
Z_ref.append (float(line_split[1]))
Z_ref_err.append(float(line_split[2]))

elif filename in Ref_landsgesell:
data = pd.read_csv(ref_path, sep="\t", index_col=False)

else:
raise RuntimeError()


# Store the data
data=pd.DataFrame({"pH": pH_range,
"charge": Z_ref,
"charge_error": Z_ref_err})

if filename in Refs_lunkad+Ref_blanco:
pH_range = np.linspace(2, 12, num=21)

Expand All @@ -66,6 +95,7 @@
"charge": Z_ref,
"charge_error": Z_ref_err})


data_path=pmb.get_resource(f"testsuite/data")
data.to_csv(f"{data_path}/{output_filenames[filename]}",
index=False)
Loading

0 comments on commit 2b6868b

Please sign in to comment.