Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
VGPReys committed Aug 29, 2023
1 parent b26dcc1 commit 08e89e5
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 15 deletions.
36 changes: 29 additions & 7 deletions src/arctic3d/cli_resclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@
`linkage` : the linkage strategy.
`criterion` : the criterion to extract the clusters.
`output` : the path where to output clusters data.
"""
import argparse
import os
import sys

import MDAnalysis as mda
Expand All @@ -36,6 +39,7 @@
get_clustering_dict,
)
from arctic3d.modules.input import Input
from arctic3d.modules.output import create_output_folder


argument_parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -88,6 +92,13 @@
"--chain", help="Segment ID to be considered", required=False
)

argument_parser.add_argument(
"--output",
help="Path to the generated output dictionary",
type=str,
required=False,
)


def load_args(arguments):
"""
Expand Down Expand Up @@ -128,7 +139,7 @@ def maincli():
cli(argument_parser, main)


def main(input_arg, residue_list, chain, threshold, linkage, criterion):
def main(input_arg, residue_list, chain, threshold, linkage, criterion, output):
"""Main function."""
log.setLevel("INFO")

Expand Down Expand Up @@ -187,14 +198,25 @@ def main(input_arg, residue_list, chain, threshold, linkage, criterion):
)

cl_dict = get_clustering_dict(clusters, unique_sorted_resids)
for el in cl_dict.keys():
log.info(
f"cluster {el}, residues"
f" {' '.join([str(res) for res in cl_dict[el]])}"
)

else:
log.info("Only one residue, no clustering performed.")
log.info(f"cluster 1, residues {unique_sorted_resids[0]}")
# fake cluster dict with only one entry
cl_dict = {1: unique_sorted_resids}

# log data
for el in cl_dict.keys():
log.info(
f"cluster {el}, residues"
f" {' '.join([str(res) for res in cl_dict[el]])}"
)

# check if data must be flushed to output file
if output:
output_basepath = create_output_folder(output, uniprot_id='resclust')
log.info(f'writing clusters data in "{output_basepath}/Clusters.json"')
with open(f'{output_basepath}/Clusters.json', 'w') as filout:
filout.write(str(cl_dict).replace("'", '"'))


if __name__ == "__main__":
Expand Down
10 changes: 6 additions & 4 deletions src/arctic3d/modules/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
import time
import json

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -127,11 +128,12 @@ def get_clustering_dict(clusters, ligands):
cl_dict = {}
# loop over clusters
for cl in range(len(clusters)):
if clusters[cl] not in cl_dict.keys():
cl_dict[clusters[cl]] = [ligands[cl]]
if (strcl := str(clusters[cl])) not in cl_dict.keys():
cl_dict[strcl] = [ligands[cl]]
else:
cl_dict[clusters[cl]].append(ligands[cl])
log.info(f"Cluster dictionary {cl_dict}")
cl_dict[strcl].append(ligands[cl])
strdict = str(cl_dict).replace("'", '"')
log.info(f"Cluster dictionary {strdict}")
return cl_dict


Expand Down
22 changes: 22 additions & 0 deletions tests/test_cli_resclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import pytest

import os
import shutil

from arctic3d.cli_resclust import main

from . import golden_data
Expand All @@ -21,6 +24,7 @@ def test_resclust_cli(example_pdbpath):
7.0,
"average",
"distance",
None,
)


Expand All @@ -33,6 +37,7 @@ def test_wrong_residue_list(example_pdbpath):
9.0,
"average",
"distance",
None,
)
assert e.type == SystemExit
assert e.value.code == 1
Expand All @@ -46,4 +51,21 @@ def test_resclust_maxclust(example_pdbpath):
2,
"average",
"maxclust",
None,
)


def test_resclust_genoutput(example_pdbpath):
main(
example_pdbpath,
"100,101,102,133,134,135",
None,
2,
"average",
"maxclust",
"resclustout",
)
assert os.path.exists("resclustout") == True
assert os.path.exists("resclustout/Clusters.json") == True
shutil.rmtree("resclustout")

8 changes: 4 additions & 4 deletions tests/test_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ def test_get_cl_dict():
clusters_list = [1, 1, 2, 3, 3, 4, 2]
ligands_list = ["int1", "int2", "p53", "00", "int47", "antibody", "dimer"]
expected_cl_dict = {
1: ["int1", "int2"],
2: ["p53", "dimer"],
3: ["00", "int47"],
4: ["antibody"],
"1": ["int1", "int2"],
"2": ["p53", "dimer"],
"3": ["00", "int47"],
"4": ["antibody"],
}
observed_cl_dict = get_clustering_dict(clusters_list, ligands_list)
assert expected_cl_dict, observed_cl_dict
Expand Down

0 comments on commit 08e89e5

Please sign in to comment.