diff --git a/build/broad_sanger/build_drugs.sh b/build/broad_sanger/build_drugs.sh index fa4f4c12..a58ba4d9 100644 --- a/build/broad_sanger/build_drugs.sh +++ b/build/broad_sanger/build_drugs.sh @@ -1,3 +1,4 @@ /opt/venv/bin/python 03a-nci60Drugs.py Rscript 03-createDrugFile.R CTRPv2,GDSC,gCSI,PRISM,CCLE,FIMM -/opt/venv/bin/python build_drug_desc.py --drugtable /tmp/broad_sanger_drugs.tsv --desctable /tmp/broad_sanger_drug_desciptors.tsv.gz +yes +/opt/venv/bin/python build_drug_desc.py --drugtable /tmp/broad_sanger_drugs.tsv --desctable /tmp/broad_sanger_drug_descriptors.tsv.gz diff --git a/build/utils/build_drug_desc.py b/build/utils/build_drug_desc.py index e53570de..9b6e91b4 100644 --- a/build/utils/build_drug_desc.py +++ b/build/utils/build_drug_desc.py @@ -40,12 +40,20 @@ def smiles_to_mordred(smiles): ''' print('Computing mordred descriptors for '+str(len(smiles))+' SMILES') + mols = [Chem.MolFromSmiles(s) for s in smiles] + smols = [] + ssmil = [] + for i in range(len(mols)): + m = moles[i] + if m is not None: + smols.append(m) + ssmil.append(smiles[i]) calc = Calculator(descriptors, ignore_3D=True) - dd = calc.pandas( mols, nmols=None, quiet=False, ipynb=False ) + dd = calc.pandas( smols, nmols=None, quiet=False, ipynb=False ) values = dd.columns - dd['smile'] = smiles + dd['smile'] = ssmil ##reformat here longtab = pd.melt(dd,id_vars='smile',value_vars=values) longtab = longtab.rename({'variable':'structural_descriptor','value':'descriptor_value'},axis=1)