forked from choderalab/nano-drugbank
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcount_sulfinyls.py
36 lines (27 loc) · 1.07 KB
/
count_sulfinyls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from __future__ import print_function
__author__ = 'isikm'
import pandas as pd
from openeye import oechem, oedepict
df_drugbank_smiles=pd.DataFrame.from_csv('df_drugbank_smiles.csv', encoding='utf-8')
# Count number of sulfinyl groups in each row using SMARTS matching
for i, row in enumerate(df_drugbank_smiles.iterrows()):
smiles = df_drugbank_smiles.loc[i,"smiles"]
mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, str(smiles))
# create a substructure search object
queried_substructure="[SX3](=O)(*)*"
ss = oechem.OESubSearch(queried_substructure)
oechem.OEPrepareSearch(mol, ss)
# loop over matches to count
matched_ss_list=[]
count=0
for index, match in enumerate(ss.Match(mol)):
if ss.SingleMatch(mol) == True:
matched_ss_list.append((index, match))
count = len(matched_ss_list)
# print(count, match)
# add number of matches to dataframe
df_drugbank_smiles.loc[i,"sulfinyl"] = count
#write to csv
df_drugbank_smiles.to_csv("df_drugbank_smiles.csv", encoding='utf-8')
print("Done.")