Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify resclust outputs #330

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
36 changes: 29 additions & 7 deletions src/arctic3d/cli_resclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@
`linkage` : the linkage strategy.

`criterion` : the criterion to extract the clusters.

`output` : the path where to output clusters data.
"""
import argparse
import os
import sys

import MDAnalysis as mda
Expand All @@ -36,6 +39,7 @@
get_clustering_dict,
)
from arctic3d.modules.input import Input
from arctic3d.modules.output import create_output_folder


argument_parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -88,6 +92,13 @@
"--chain", help="Segment ID to be considered", required=False
)

argument_parser.add_argument(
"--output",
help="Path to the generated output dictionary",
type=str,
required=False,
)


def load_args(arguments):
"""
Expand Down Expand Up @@ -128,7 +139,7 @@ def maincli():
cli(argument_parser, main)


def main(input_arg, residue_list, chain, threshold, linkage, criterion):
def main(input_arg, residue_list, chain, threshold, linkage, criterion, output):
"""Main function."""
log.setLevel("INFO")

Expand Down Expand Up @@ -187,14 +198,25 @@ def main(input_arg, residue_list, chain, threshold, linkage, criterion):
)

cl_dict = get_clustering_dict(clusters, unique_sorted_resids)
for el in cl_dict.keys():
log.info(
f"cluster {el}, residues"
f" {' '.join([str(res) for res in cl_dict[el]])}"
)

else:
log.info("Only one residue, no clustering performed.")
log.info(f"cluster 1, residues {unique_sorted_resids[0]}")
# fake cluster dict with only one entry
cl_dict = {1: unique_sorted_resids}

# log data
for el in cl_dict.keys():
log.info(
f"cluster {el}, residues"
f" {' '.join([str(res) for res in cl_dict[el]])}"
)

# check if data must be flushed to output file
if output:
output_basepath = create_output_folder(output, uniprot_id='resclust')
log.info(f'writing clusters data in "{output_basepath}/Clusters.json"')
with open(f'{output_basepath}/Clusters.json', 'w') as filout:
filout.write(str(cl_dict).replace("'", '"'))


if __name__ == "__main__":
Expand Down
10 changes: 6 additions & 4 deletions src/arctic3d/modules/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
import time
import json

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -127,11 +128,12 @@ def get_clustering_dict(clusters, ligands):
cl_dict = {}
# loop over clusters
for cl in range(len(clusters)):
if clusters[cl] not in cl_dict.keys():
cl_dict[clusters[cl]] = [ligands[cl]]
if (strcl := str(clusters[cl])) not in cl_dict.keys():
cl_dict[strcl] = [ligands[cl]]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will change the type of the key from integer to string and can have breaking consequences, why not use integers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the point of the PR, integer as keys cannot be loaded using the json library.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes this is the default behaviour of json.load, casting the type into it needs you to reclass a Decoder

else:
cl_dict[clusters[cl]].append(ligands[cl])
log.info(f"Cluster dictionary {cl_dict}")
cl_dict[strcl].append(ligands[cl])
strdict = str(cl_dict).replace("'", '"')
log.info(f"Cluster dictionary {strdict}")
return cl_dict


Expand Down
22 changes: 22 additions & 0 deletions tests/test_cli_resclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import pytest

import os
import shutil

from arctic3d.cli_resclust import main

from . import golden_data
Expand All @@ -21,6 +24,7 @@ def test_resclust_cli(example_pdbpath):
7.0,
"average",
"distance",
None,
)


Expand All @@ -33,6 +37,7 @@ def test_wrong_residue_list(example_pdbpath):
9.0,
"average",
"distance",
None,
)
assert e.type == SystemExit
assert e.value.code == 1
Expand All @@ -46,4 +51,21 @@ def test_resclust_maxclust(example_pdbpath):
2,
"average",
"maxclust",
None,
)


def test_resclust_genoutput(example_pdbpath):
main(
example_pdbpath,
"100,101,102,133,134,135",
None,
2,
"average",
"maxclust",
"resclustout",
)
assert os.path.exists("resclustout") == True
assert os.path.exists("resclustout/Clusters.json") == True
shutil.rmtree("resclustout")

8 changes: 4 additions & 4 deletions tests/test_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ def test_get_cl_dict():
clusters_list = [1, 1, 2, 3, 3, 4, 2]
ligands_list = ["int1", "int2", "p53", "00", "int47", "antibody", "dimer"]
expected_cl_dict = {
1: ["int1", "int2"],
2: ["p53", "dimer"],
3: ["00", "int47"],
4: ["antibody"],
"1": ["int1", "int2"],
"2": ["p53", "dimer"],
"3": ["00", "int47"],
"4": ["antibody"],
}
observed_cl_dict = get_clustering_dict(clusters_list, ligands_list)
assert expected_cl_dict, observed_cl_dict
Expand Down