From 08e89e5867e60e4047b0aa6644265bdde7c8416a Mon Sep 17 00:00:00 2001 From: Victor Reys Date: Tue, 29 Aug 2023 11:23:29 +0200 Subject: [PATCH 01/11] issue #327 --- src/arctic3d/cli_resclust.py | 36 ++++++++++++++++++++++++------ src/arctic3d/modules/clustering.py | 10 +++++---- tests/test_cli_resclust.py | 22 ++++++++++++++++++ tests/test_clustering.py | 8 +++---- 4 files changed, 61 insertions(+), 15 deletions(-) diff --git a/src/arctic3d/cli_resclust.py b/src/arctic3d/cli_resclust.py index eddfacfa..9228d9a4 100644 --- a/src/arctic3d/cli_resclust.py +++ b/src/arctic3d/cli_resclust.py @@ -23,8 +23,11 @@ `linkage` : the linkage strategy. `criterion` : the criterion to extract the clusters. + + `output` : the path where to output clusters data. """ import argparse +import os import sys import MDAnalysis as mda @@ -36,6 +39,7 @@ get_clustering_dict, ) from arctic3d.modules.input import Input +from arctic3d.modules.output import create_output_folder argument_parser = argparse.ArgumentParser() @@ -88,6 +92,13 @@ "--chain", help="Segment ID to be considered", required=False ) +argument_parser.add_argument( + "--output", + help="Path to the generated output dictionary", + type=str, + required=False, +) + def load_args(arguments): """ @@ -128,7 +139,7 @@ def maincli(): cli(argument_parser, main) -def main(input_arg, residue_list, chain, threshold, linkage, criterion): +def main(input_arg, residue_list, chain, threshold, linkage, criterion, output): """Main function.""" log.setLevel("INFO") @@ -187,14 +198,25 @@ def main(input_arg, residue_list, chain, threshold, linkage, criterion): ) cl_dict = get_clustering_dict(clusters, unique_sorted_resids) - for el in cl_dict.keys(): - log.info( - f"cluster {el}, residues" - f" {' '.join([str(res) for res in cl_dict[el]])}" - ) + else: log.info("Only one residue, no clustering performed.") - log.info(f"cluster 1, residues {unique_sorted_resids[0]}") + # fake cluster dict with only one entry + cl_dict = {1: unique_sorted_resids} + + # log data + for el in cl_dict.keys(): + log.info( + f"cluster {el}, residues" + f" {' '.join([str(res) for res in cl_dict[el]])}" + ) + + # check if data must be flushed to output file + if output: + output_basepath = create_output_folder(output, uniprot_id='resclust') + log.info(f'writing clusters data in "{output_basepath}/Clusters.json"') + with open(f'{output_basepath}/Clusters.json', 'w') as filout: + filout.write(str(cl_dict).replace("'", '"')) if __name__ == "__main__": diff --git a/src/arctic3d/modules/clustering.py b/src/arctic3d/modules/clustering.py index 849c9687..b0225890 100644 --- a/src/arctic3d/modules/clustering.py +++ b/src/arctic3d/modules/clustering.py @@ -2,6 +2,7 @@ import logging import time +import json import matplotlib.pyplot as plt import numpy as np @@ -127,11 +128,12 @@ def get_clustering_dict(clusters, ligands): cl_dict = {} # loop over clusters for cl in range(len(clusters)): - if clusters[cl] not in cl_dict.keys(): - cl_dict[clusters[cl]] = [ligands[cl]] + if (strcl := str(clusters[cl])) not in cl_dict.keys(): + cl_dict[strcl] = [ligands[cl]] else: - cl_dict[clusters[cl]].append(ligands[cl]) - log.info(f"Cluster dictionary {cl_dict}") + cl_dict[strcl].append(ligands[cl]) + strdict = str(cl_dict).replace("'", '"') + log.info(f"Cluster dictionary {strdict}") return cl_dict diff --git a/tests/test_cli_resclust.py b/tests/test_cli_resclust.py index 7ac322a4..35818d49 100644 --- a/tests/test_cli_resclust.py +++ b/tests/test_cli_resclust.py @@ -2,6 +2,9 @@ import pytest +import os +import shutil + from arctic3d.cli_resclust import main from . import golden_data @@ -21,6 +24,7 @@ def test_resclust_cli(example_pdbpath): 7.0, "average", "distance", + None, ) @@ -33,6 +37,7 @@ def test_wrong_residue_list(example_pdbpath): 9.0, "average", "distance", + None, ) assert e.type == SystemExit assert e.value.code == 1 @@ -46,4 +51,21 @@ def test_resclust_maxclust(example_pdbpath): 2, "average", "maxclust", + None, ) + + +def test_resclust_genoutput(example_pdbpath): + main( + example_pdbpath, + "100,101,102,133,134,135", + None, + 2, + "average", + "maxclust", + "resclustout", + ) + assert os.path.exists("resclustout") == True + assert os.path.exists("resclustout/Clusters.json") == True + shutil.rmtree("resclustout") + diff --git a/tests/test_clustering.py b/tests/test_clustering.py index f2ccb71f..fd4d3147 100644 --- a/tests/test_clustering.py +++ b/tests/test_clustering.py @@ -40,10 +40,10 @@ def test_get_cl_dict(): clusters_list = [1, 1, 2, 3, 3, 4, 2] ligands_list = ["int1", "int2", "p53", "00", "int47", "antibody", "dimer"] expected_cl_dict = { - 1: ["int1", "int2"], - 2: ["p53", "dimer"], - 3: ["00", "int47"], - 4: ["antibody"], + "1": ["int1", "int2"], + "2": ["p53", "dimer"], + "3": ["00", "int47"], + "4": ["antibody"], } observed_cl_dict = get_clustering_dict(clusters_list, ligands_list) assert expected_cl_dict, observed_cl_dict From 0de5dade575106690ff34ee0cdde000e2faa0b87 Mon Sep 17 00:00:00 2001 From: Victor Reys Date: Thu, 31 Aug 2023 08:28:11 +0200 Subject: [PATCH 02/11] lint and review fixes --- src/arctic3d/cli_resclust.py | 11 ++++++++++- src/arctic3d/modules/clustering.py | 1 - tests/test_cli_resclust.py | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) mode change 100644 => 100755 src/arctic3d/cli_resclust.py mode change 100644 => 100755 src/arctic3d/modules/clustering.py mode change 100644 => 100755 tests/test_cli_resclust.py diff --git a/src/arctic3d/cli_resclust.py b/src/arctic3d/cli_resclust.py old mode 100644 new mode 100755 index 9228d9a4..3d0ae54c --- a/src/arctic3d/cli_resclust.py +++ b/src/arctic3d/cli_resclust.py @@ -27,7 +27,6 @@ `output` : the path where to output clusters data. """ import argparse -import os import sys import MDAnalysis as mda @@ -213,10 +212,20 @@ def main(input_arg, residue_list, chain, threshold, linkage, criterion, output): # check if data must be flushed to output file if output: + # initiate output directory output_basepath = create_output_folder(output, uniprot_id='resclust') + # write json file log.info(f'writing clusters data in "{output_basepath}/Clusters.json"') with open(f'{output_basepath}/Clusters.json', 'w') as filout: filout.write(str(cl_dict).replace("'", '"')) + # write txt file + log.info(f'writing clusters data in "{output_basepath}/Clusters.txt"') + with open(f'{output_basepath}/Clusters.txt', 'w') as filout: + for el in cl_dict.keys(): + filout.write( + f"cluster {el} -> " + f"{' '.join([str(res) for res in cl_dict[el]])}" + ) if __name__ == "__main__": diff --git a/src/arctic3d/modules/clustering.py b/src/arctic3d/modules/clustering.py old mode 100644 new mode 100755 index b0225890..db9f377f --- a/src/arctic3d/modules/clustering.py +++ b/src/arctic3d/modules/clustering.py @@ -2,7 +2,6 @@ import logging import time -import json import matplotlib.pyplot as plt import numpy as np diff --git a/tests/test_cli_resclust.py b/tests/test_cli_resclust.py old mode 100644 new mode 100755 index 35818d49..912e359c --- a/tests/test_cli_resclust.py +++ b/tests/test_cli_resclust.py @@ -67,5 +67,6 @@ def test_resclust_genoutput(example_pdbpath): ) assert os.path.exists("resclustout") == True assert os.path.exists("resclustout/Clusters.json") == True + assert os.path.exists("resclustout/Clusters.txt") == True shutil.rmtree("resclustout") From dd67e3334dfe9bc8afe414b042ec18b702a25877 Mon Sep 17 00:00:00 2001 From: Victor Reys Date: Thu, 31 Aug 2023 08:33:50 +0200 Subject: [PATCH 03/11] filenames & ext --- src/arctic3d/cli_resclust.py | 4 ++-- tests/test_cli_resclust.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/arctic3d/cli_resclust.py b/src/arctic3d/cli_resclust.py index 3d0ae54c..3fde60be 100755 --- a/src/arctic3d/cli_resclust.py +++ b/src/arctic3d/cli_resclust.py @@ -216,11 +216,11 @@ def main(input_arg, residue_list, chain, threshold, linkage, criterion, output): output_basepath = create_output_folder(output, uniprot_id='resclust') # write json file log.info(f'writing clusters data in "{output_basepath}/Clusters.json"') - with open(f'{output_basepath}/Clusters.json', 'w') as filout: + with open(f'{output_basepath}/clustered_residues.json', 'w') as filout: filout.write(str(cl_dict).replace("'", '"')) # write txt file log.info(f'writing clusters data in "{output_basepath}/Clusters.txt"') - with open(f'{output_basepath}/Clusters.txt', 'w') as filout: + with open(f'{output_basepath}/clustered_residues.out', 'w') as filout: for el in cl_dict.keys(): filout.write( f"cluster {el} -> " diff --git a/tests/test_cli_resclust.py b/tests/test_cli_resclust.py index 912e359c..dfc25c08 100755 --- a/tests/test_cli_resclust.py +++ b/tests/test_cli_resclust.py @@ -66,7 +66,7 @@ def test_resclust_genoutput(example_pdbpath): "resclustout", ) assert os.path.exists("resclustout") == True - assert os.path.exists("resclustout/Clusters.json") == True - assert os.path.exists("resclustout/Clusters.txt") == True + assert os.path.exists("resclustout/clustered_residues.json") == True + assert os.path.exists("resclustout/clustered_residues.out") == True shutil.rmtree("resclustout") From ce3ecae894887f8aa1b7d5ab7aa73e00d918f9b7 Mon Sep 17 00:00:00 2001 From: Victor Reys Date: Thu, 31 Aug 2023 08:54:32 +0200 Subject: [PATCH 04/11] Carriage return issue in .out file --- src/arctic3d/cli_resclust.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/arctic3d/cli_resclust.py b/src/arctic3d/cli_resclust.py index 3fde60be..c343b96f 100755 --- a/src/arctic3d/cli_resclust.py +++ b/src/arctic3d/cli_resclust.py @@ -225,6 +225,7 @@ def main(input_arg, residue_list, chain, threshold, linkage, criterion, output): filout.write( f"cluster {el} -> " f"{' '.join([str(res) for res in cl_dict[el]])}" + "\n" ) From 1094bad11d071c81e45a872dd3634336bd9377b4 Mon Sep 17 00:00:00 2001 From: Victor Reys <132575181+VGPReys@users.noreply.github.com> Date: Tue, 5 Sep 2023 14:41:49 +0200 Subject: [PATCH 05/11] Update clustering.py removing string casting --- src/arctic3d/modules/clustering.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/arctic3d/modules/clustering.py b/src/arctic3d/modules/clustering.py index db9f377f..9596d3d8 100755 --- a/src/arctic3d/modules/clustering.py +++ b/src/arctic3d/modules/clustering.py @@ -127,12 +127,11 @@ def get_clustering_dict(clusters, ligands): cl_dict = {} # loop over clusters for cl in range(len(clusters)): - if (strcl := str(clusters[cl])) not in cl_dict.keys(): - cl_dict[strcl] = [ligands[cl]] + if clusters[cl] not in cl_dict.keys(): + cl_dict[cl] = [ligands[cl]] else: - cl_dict[strcl].append(ligands[cl]) - strdict = str(cl_dict).replace("'", '"') - log.info(f"Cluster dictionary {strdict}") + cl_dict[cl].append(ligands[cl]) + log.info(f"Cluster dictionary {cl_dict}") return cl_dict From 999cdff12608fbca1030a7feb82e634a935aa28e Mon Sep 17 00:00:00 2001 From: Victor Reys <132575181+VGPReys@users.noreply.github.com> Date: Tue, 5 Sep 2023 14:42:44 +0200 Subject: [PATCH 06/11] Update cli_resclust.py removing json output file --- src/arctic3d/cli_resclust.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/arctic3d/cli_resclust.py b/src/arctic3d/cli_resclust.py index fb4b2c65..b242dcb6 100755 --- a/src/arctic3d/cli_resclust.py +++ b/src/arctic3d/cli_resclust.py @@ -219,10 +219,6 @@ def main(input_arg, residue_list, chain, threshold, linkage, criterion, output): if output: # initiate output directory output_basepath = create_output_folder(output, uniprot_id='resclust') - # write json file - log.info(f'writing clusters data in "{output_basepath}/Clusters.json"') - with open(f'{output_basepath}/clustered_residues.json', 'w') as filout: - filout.write(str(cl_dict).replace("'", '"')) # write txt file log.info(f'writing clusters data in "{output_basepath}/Clusters.txt"') with open(f'{output_basepath}/clustered_residues.out', 'w') as filout: From bce2c1a8380b0b69906cc3beea15f4f971ff281a Mon Sep 17 00:00:00 2001 From: Victor Reys <132575181+VGPReys@users.noreply.github.com> Date: Tue, 5 Sep 2023 14:43:17 +0200 Subject: [PATCH 07/11] Update test_cli_resclust.py removing json output file test --- tests/test_cli_resclust.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_cli_resclust.py b/tests/test_cli_resclust.py index dfc25c08..a6353a24 100755 --- a/tests/test_cli_resclust.py +++ b/tests/test_cli_resclust.py @@ -66,7 +66,6 @@ def test_resclust_genoutput(example_pdbpath): "resclustout", ) assert os.path.exists("resclustout") == True - assert os.path.exists("resclustout/clustered_residues.json") == True assert os.path.exists("resclustout/clustered_residues.out") == True shutil.rmtree("resclustout") From 2f883e6df38e0d65c3007f8dd7729f011532ead6 Mon Sep 17 00:00:00 2001 From: Victor Reys <132575181+VGPReys@users.noreply.github.com> Date: Tue, 5 Sep 2023 14:43:54 +0200 Subject: [PATCH 08/11] Update test_clustering.py removing string version on keys in test_get_cl_dict() --- tests/test_clustering.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_clustering.py b/tests/test_clustering.py index fd4d3147..f2ccb71f 100644 --- a/tests/test_clustering.py +++ b/tests/test_clustering.py @@ -40,10 +40,10 @@ def test_get_cl_dict(): clusters_list = [1, 1, 2, 3, 3, 4, 2] ligands_list = ["int1", "int2", "p53", "00", "int47", "antibody", "dimer"] expected_cl_dict = { - "1": ["int1", "int2"], - "2": ["p53", "dimer"], - "3": ["00", "int47"], - "4": ["antibody"], + 1: ["int1", "int2"], + 2: ["p53", "dimer"], + 3: ["00", "int47"], + 4: ["antibody"], } observed_cl_dict = get_clustering_dict(clusters_list, ligands_list) assert expected_cl_dict, observed_cl_dict From a776885fd9332b801800acf0a65929da1637d44a Mon Sep 17 00:00:00 2001 From: Rodrigo V Honorato Date: Tue, 5 Sep 2023 15:45:05 +0200 Subject: [PATCH 09/11] update ci.yml --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7cfbbbf8..44d162dc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,6 +1,6 @@ name: ci -on: push +on: pull_request jobs: build: From 2cc1c756c86db54ee5564de459ab71085c221180 Mon Sep 17 00:00:00 2001 From: Victor Reys <132575181+VGPReys@users.noreply.github.com> Date: Tue, 6 Aug 2024 10:45:56 +0200 Subject: [PATCH 10/11] update logic in addition of ligand in cluster dict --- src/arctic3d/modules/clustering.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/arctic3d/modules/clustering.py b/src/arctic3d/modules/clustering.py index 9596d3d8..b7a30448 100755 --- a/src/arctic3d/modules/clustering.py +++ b/src/arctic3d/modules/clustering.py @@ -127,10 +127,8 @@ def get_clustering_dict(clusters, ligands): cl_dict = {} # loop over clusters for cl in range(len(clusters)): - if clusters[cl] not in cl_dict.keys(): - cl_dict[cl] = [ligands[cl]] - else: - cl_dict[cl].append(ligands[cl]) + cluster_members = cl_dict.setdefautl(cl, []) + cluster_members.append(ligands[cl]) log.info(f"Cluster dictionary {cl_dict}") return cl_dict From 7dd88ec574416165775d0550ab21441b4675be5b Mon Sep 17 00:00:00 2001 From: Victor Reys <132575181+VGPReys@users.noreply.github.com> Date: Tue, 6 Aug 2024 12:51:21 +0200 Subject: [PATCH 11/11] Update clustering.py --- src/arctic3d/modules/clustering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arctic3d/modules/clustering.py b/src/arctic3d/modules/clustering.py index b7a30448..7686f8c8 100755 --- a/src/arctic3d/modules/clustering.py +++ b/src/arctic3d/modules/clustering.py @@ -127,7 +127,7 @@ def get_clustering_dict(clusters, ligands): cl_dict = {} # loop over clusters for cl in range(len(clusters)): - cluster_members = cl_dict.setdefautl(cl, []) + cluster_members = cl_dict.setdefault(cl, []) cluster_members.append(ligands[cl]) log.info(f"Cluster dictionary {cl_dict}") return cl_dict