diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 88a73df..2538224 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,6 +1,6 @@ name: ci -on: push +on: pull_request jobs: build: diff --git a/src/arctic3d/cli_resclust.py b/src/arctic3d/cli_resclust.py old mode 100644 new mode 100755 index 7b70459..b242dcb --- a/src/arctic3d/cli_resclust.py +++ b/src/arctic3d/cli_resclust.py @@ -23,6 +23,8 @@ `linkage` : the linkage strategy. `criterion` : the criterion to extract the clusters. + + `output` : the path where to output clusters data. """ import argparse import sys @@ -36,6 +38,7 @@ get_clustering_dict, ) from arctic3d.modules.input import Input +from arctic3d.modules.output import create_output_folder argument_parser = argparse.ArgumentParser() @@ -88,6 +91,13 @@ "--chain", help="Segment ID to be considered", required=False ) +argument_parser.add_argument( + "--output", + help="Path to the generated output dictionary", + type=str, + required=False, +) + def load_args(arguments): """ @@ -128,7 +138,7 @@ def maincli(): cli(argument_parser, main) -def main(input_arg, residue_list, chain, threshold, linkage, criterion): +def main(input_arg, residue_list, chain, threshold, linkage, criterion, output): """Main function.""" log.setLevel("INFO") @@ -192,14 +202,32 @@ def main(input_arg, residue_list, chain, threshold, linkage, criterion): ) cl_dict = get_clustering_dict(clusters, unique_sorted_resids) - for el in cl_dict.keys(): - log.info( - f"cluster {el}, residues" - f" {' '.join([str(res) for res in cl_dict[el]])}" - ) + else: log.info("Only one residue, no clustering performed.") - log.info(f"cluster 1, residues {unique_sorted_resids[0]}") + # fake cluster dict with only one entry + cl_dict = {1: unique_sorted_resids} + + # log data + for el in cl_dict.keys(): + log.info( + f"cluster {el}, residues" + f" {' '.join([str(res) for res in cl_dict[el]])}" + ) + + # check if data must be flushed to output file + if output: + # initiate output directory + output_basepath = create_output_folder(output, uniprot_id='resclust') + # write txt file + log.info(f'writing clusters data in "{output_basepath}/Clusters.txt"') + with open(f'{output_basepath}/clustered_residues.out', 'w') as filout: + for el in cl_dict.keys(): + filout.write( + f"cluster {el} -> " + f"{' '.join([str(res) for res in cl_dict[el]])}" + "\n" + ) if __name__ == "__main__": diff --git a/src/arctic3d/modules/clustering.py b/src/arctic3d/modules/clustering.py old mode 100644 new mode 100755 index 849c968..7686f8c --- a/src/arctic3d/modules/clustering.py +++ b/src/arctic3d/modules/clustering.py @@ -127,10 +127,8 @@ def get_clustering_dict(clusters, ligands): cl_dict = {} # loop over clusters for cl in range(len(clusters)): - if clusters[cl] not in cl_dict.keys(): - cl_dict[clusters[cl]] = [ligands[cl]] - else: - cl_dict[clusters[cl]].append(ligands[cl]) + cluster_members = cl_dict.setdefault(cl, []) + cluster_members.append(ligands[cl]) log.info(f"Cluster dictionary {cl_dict}") return cl_dict diff --git a/tests/test_cli_resclust.py b/tests/test_cli_resclust.py old mode 100644 new mode 100755 index 06f19d7..b1762a7 --- a/tests/test_cli_resclust.py +++ b/tests/test_cli_resclust.py @@ -2,6 +2,9 @@ import pytest +import os +import shutil + from arctic3d.cli_resclust import main from . import golden_data @@ -22,6 +25,7 @@ def test_resclust_cli(example_pdbpath): 7.0, "average", "distance", + None, ) @@ -35,6 +39,7 @@ def test_wrong_residue_list(example_pdbpath): 9.0, "average", "distance", + None, ) assert e.type == SystemExit assert e.value.code == 1 @@ -49,4 +54,21 @@ def test_resclust_maxclust(example_pdbpath): 2, "average", "maxclust", + None, ) + + +def test_resclust_genoutput(example_pdbpath): + main( + example_pdbpath, + "100,101,102,133,134,135", + None, + 2, + "average", + "maxclust", + "resclustout", + ) + assert os.path.exists("resclustout") == True + assert os.path.exists("resclustout/clustered_residues.out") == True + shutil.rmtree("resclustout") +