Improve proteins script (#39)

* added new functions * added test script * changes on new functions * clean up of the test * added write_pmb_df to all the samples * added read-write test to makefile * minor changes * clean up and minor changes * remove pmb.df as an argument in write_pmb_df() function * added assert_frame_equal * added dtypes on the df setup * minor changes * minor change and cleanup * clean up * clean up * adapted define_particle * adapted setup_df with new columns * minor change * adapted new cols with units * minor change * uncomment a line * changed dtype of es_type * make CI aware when the test fails, tidy up test and Makefile * changes on parameters_of_the_potential format * deleted unnecesary lines * adapted lj_test * changes on add_bond_in_df * remove commented lines on add_nomd_in_df * fix import of handy_tools * rename activamte_motion.. function * adapted standireze_data * added standireze data * added test script for globular protein * added different mode * adapted globular_protein script to new format * changes on protein test * minor changes * added to read radius from protein vtf file * remove sigma dict * changes to add amino charge in time_series output * added to reproduce globular protein plot * changes on data path * remove run_test_protein * minor changes * minor change * fix calculation of charge per AAA * fix ion size, fix setup of the simulation, revert particle size to 0.35 nm for protein beads * changes on radius to calculate mean radius of aminos * changes on protein radius assignment * new cg model * minor typo fix * minor typo fix on protein pdb * added new 1f6s cg * added test data * deleted folders * added globular protein test to makefile * make cpH CI test less overzeaulous * avoid defining acidity in define_particles_AA, remove calls to os.system --------- Co-authored-by: Pablo M. Blanco <[email protected]> Co-authored-by: blancoapa <[email protected]>
pyMBE-dev · Apr 24, 2024 · 2b6868b · 2b6868b
1 parent a5edd7c
commit 2b6868b
Show file tree

Hide file tree

Showing 15 changed files with 693 additions and 546 deletions.
diff --git a/Makefile b/Makefile
@@ -19,10 +19,10 @@ tests:
 	python3 testsuite/grxmc_ideal_tests.py
 	python3 testsuite/peptide_tests.py
 	python3 testsuite/weak_polyelectrolyte_dialysis_test.py
+	python3 testsuite/globular_protein_tests.py
 sample:
 	python3 sample_scripts/peptide_simulation_example.py
 
-
 visual:
 	python3 handy_scripts/vmd-traj.py
 	vmd -e visualization.tcl

diff --git a/lib/create_cg_from_pdb.py b/lib/create_cg_from_pdb.py
@@ -308,14 +308,17 @@ def create_sidechain_beads  (pdb_df) :
         x_coord_r = pdb_df['x_pos'] 
         y_coord_r = pdb_df['y_pos'] 
         z_coord_r = pdb_df['z_pos'] 
+
+        pdb_df['radius_mean'] = pdb_df.groupby(['residue_name'])['radius_r'].transform (np.mean).round(4)
 
         atom_number_pdb_r = pd.Series(pdb_df.residue_number)
         resname_r = pd.Series(pdb_df.residue_name)
         resname_one_letter_r=  pd.Series (pdb_df.resname_one_letter)
         resid_r = pd.Series(pdb_df.residue_number)
 
-        radius_r = pd.Series(pdb_df['radius_r'])
+        radius_r = pd.Series(pdb_df['radius_mean'])
         chain_r = pd.Series(pdb_df.chain_id)
+
         residues_bead = {'atom_numbers_r': atom_number_pdb_r,\
              'x_coord_r':x_coord_r,'y_coord_r':y_coord_r,'z_coord_r':z_coord_r, \
                 'resname_r':resname_r,'resname_one_letter_r':resname_one_letter_r , 'radius_r':radius_r, 'chain_r': chain_r, 'resid_r': resid_r}
@@ -507,7 +510,7 @@ def create_output_coarse_grain_model_as_vtf_file  (coarse_grain,beads_bond, iden
     parser = argparse.ArgumentParser(description='Creates a coarse-grained model from a protein structure given in PDB format')    
     parser.add_argument('--filename', dest='filename', help='\nPath to the PDB file\n')
     parser.add_argument('--download_pdb', dest='pdb_code', help='Downloads the corresponding PDB from RCSB and coarse-grains it') 
-    parser.add_argument('--model', dest='model', default='2bead', type=str , help='\nCoarse-grained model to be used\n')
+    parser.add_argument('--model', dest='model', default='2beadAA', type=str , help='\nCoarse-grained model to be used\n')
     parser.add_argument('--chain_id', type=str , help='\nSpecific chaid_id to coarse-grain\n') 
     parser.add_argument('--verbose', dest='verbose', action='store_true')
     parser.add_argument('--no-verbose', dest='verbose', action='store_false')

diff --git a/maintainer/standarize_data.py b/maintainer/standarize_data.py
@@ -11,7 +11,10 @@
 supported_filenames=["data_landsgesell.csv",
                      "Glu-HisMSDE.csv",
                      "Lys-AspMSDE.csv",
-                     "histatin5_SoftMatter.txt"]
+                     "histatin5_SoftMatter.txt",
+                     "1beb-10mM-torres.dat",
+                     "1f6s-10mM-torres.dat"]
+
 
 parser = argparse.ArgumentParser(description='Script to standarize the data from various authors')
 parser.add_argument('--src_filename', 
@@ -27,7 +30,9 @@
 output_filenames={"data_landsgesell.csv": "Landsgesell2020a.csv",
                   "Lys-AspMSDE.csv": "Lunkad2021a.csv",
                   "Glu-HisMSDE.csv": "Lunkad2021b.csv",
-                  "histatin5_SoftMatter.txt": "Blanco2020a.csv"}
+                  "histatin5_SoftMatter.txt": "Blanco2020a.csv",
+                  "1beb-10mM-torres.dat": "Torres2017.csv",
+                  "1f6s-10mM-torres.dat": "Torres2022.csv"}
 
 # Sanity checks
 if filename not in supported_filenames:
@@ -37,27 +42,51 @@
 ref_path=pmb.get_resource(f"testsuite/data/src/{filename}")
 Refs_lunkad=["Glu-HisMSDE.csv","Lys-AspMSDE.csv"]
 Ref_blanco=["histatin5_SoftMatter.txt"]
+
+Ref_torres = ["1f6s-10mM-torres.dat","1beb-10mM-torres.dat" ]
 Ref_landsgesell=["data_landsgesell.csv"]
 
+
 if filename in Refs_lunkad:
     data=pd.read_csv(ref_path)
     Z_ref = 5*-1*data['aaa']+5*data['aab']
     # Error propagation calculation
     # 1/4 factor added to correct for bug in the original calculation of the error reported by the authors       
     Z_ref_err = 5/4*np.sqrt((data['eaa'])**2+(data['eab'])**2)   
+    pH_range = np.linspace(2, 12, num=21)
 
 elif filename in Ref_blanco:
     data=np.loadtxt(ref_path, delimiter=",")
     Z_ref=data[:,1]         
     Z_ref_err=data[:,2]
 
+    pH_range = np.linspace(2, 12, num=21)
+
+elif filename in Ref_torres:
+
+    Z_ref = []
+    Z_ref_err = []
+    pH_range = []
+
+    with open (ref_path,'r') as file:
+        for line in file: 
+            line_split = line.split ()
+            pH = float (line_split[0])
+            pH_range.append(pH)
+            Z_ref.append (float(line_split[1]))
+            Z_ref_err.append(float(line_split[2]))
+
 elif filename in Ref_landsgesell:
     data = pd.read_csv(ref_path, sep="\t", index_col=False)
-
 else:
     raise RuntimeError()
 
 
+# Store the data
+data=pd.DataFrame({"pH": pH_range,
+                  "charge": Z_ref,
+                  "charge_error": Z_ref_err})
+
 if filename in Refs_lunkad+Ref_blanco:
     pH_range = np.linspace(2, 12, num=21)
 
@@ -66,6 +95,7 @@
                       "charge": Z_ref,
                       "charge_error": Z_ref_err})
 
+
 data_path=pmb.get_resource(f"testsuite/data")
 data.to_csv(f"{data_path}/{output_filenames[filename]}", 
             index=False)