Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
TieuLongPhan committed Nov 11, 2024
2 parents b847438 + 8c17232 commit bb742fa
Show file tree
Hide file tree
Showing 50 changed files with 3,135 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-and-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name: Test & Lint

on:
push:
branches: [ "main", "dev" ]
branches: [ "dev" ]
pull_request:
branches: [ "main" ]

Expand Down
7 changes: 1 addition & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,5 @@
*.csv
*/catboost_info/*
*.ipynb
test.py
rebalance_test.py
split_comparison.py
fp.py

*.json
split_benchmark_process.py
test_mod.py
Binary file added Data/test.pkl.gz
Binary file not shown.
Empty file.
90 changes: 90 additions & 0 deletions Test/SynChem/Molecule/test_standardize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import unittest
from rdkit import Chem
from synutility.SynChem.Molecule.standardize import (
normalize_molecule,
canonicalize_tautomer,
salts_remover,
uncharge_molecule,
fragments_remover,
remove_explicit_hydrogens,
remove_radicals_and_add_hydrogens,
remove_isotopes,
clear_stereochemistry,
)


class TestMoleculeFunctions(unittest.TestCase):

def test_normalize_molecule(self):
smi = "[Na]OC(=O)c1ccc(C[S+2]([O-])([O-]))cc1"
expect = "O=C(O[Na])c1ccc(C[S](=O)=O)cc1"
mol = Chem.MolFromSmiles(smi)
normalized_mol = normalize_molecule(mol)
self.assertIsInstance(normalized_mol, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(normalized_mol))

def test_canonicalize_tautomer(self):
smi = "N=c1[nH]cc[nH]1"
expect = "Nc1ncc[nH]1"
mol = Chem.MolFromSmiles(smi)
tautomer = canonicalize_tautomer(mol)
self.assertIsInstance(tautomer, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(tautomer))

def test_salts_remover(self):
smi = "CC(=O).[Na+]"
expect = "CC=O"
mol = Chem.MolFromSmiles(smi)
remover = salts_remover(mol)
self.assertIsInstance(remover, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(remover))

def test_uncharge_molecule(self):
smi = "CC(=O)[O-]"
expect = "CC(=O)O"
mol = Chem.MolFromSmiles(smi)
uncharged_mol = uncharge_molecule(mol)
self.assertIsInstance(uncharged_mol, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(uncharged_mol))

def test_fragments_remover(self):
smi = "CC(=O)[O-].[Na+]"
expect = "CC(=O)[O-]"
mol = Chem.MolFromSmiles(smi)
remover = fragments_remover(mol)
self.assertIsInstance(remover, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(remover))

def test_remove_explicit_hydrogens(self):
smi = "[CH4]"
expect = "C"
mol = Chem.MolFromSmiles(smi)
remover = remove_explicit_hydrogens(mol)
self.assertIsInstance(remover, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(remover))

def test_remove_radicals(self):
smi = "[CH3]"
expect = "C"
mol = Chem.MolFromSmiles(smi)
remover = remove_radicals_and_add_hydrogens(mol)
self.assertIsInstance(remover, Chem.Mol)
self.assertEqual(expect, Chem.MolToSmiles(remover))

def test_remove_isotopes(self):
# Molecule with isotopic labeling
smiles = "[13CH3]C([2H])([2H])[17O][18OH]"
expect = "[H]C([H])(C)OO"
mol = Chem.MolFromSmiles(smiles)
result_mol = remove_isotopes(mol)
for atom in result_mol.GetAtoms():
self.assertEqual(atom.GetIsotope(), 0, "Isotopes not properly removed")
self.assertEqual(Chem.MolToSmiles(result_mol), expect)

def test_clear_stereochemistry(self):
# Molecule with defined stereochemistry
smiles = "C[C@H](O)[C@@H](O)C"
mol = Chem.MolFromSmiles(smiles)
result_mol = clear_stereochemistry(mol)
has_stereo = any(atom.HasProp("_CIPCode") for atom in result_mol.GetAtoms())
self.assertFalse(has_stereo, "Stereochemistry not properly cleared")
26 changes: 26 additions & 0 deletions Test/SynChem/Reaction/test_cleanning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import unittest
from synutility.SynChem.Reaction.cleanning import Cleanning


class TestCleaning(unittest.TestCase):

def setUp(self):
self.cleaner = Cleanning()

def test_remove_duplicates(self):
input_smiles = ["CC>>CC", "CC>>CC"]
expected_output = ["CC>>CC"]
result = self.cleaner.remove_duplicates(input_smiles)
self.assertEqual(
result, expected_output, "Failed to remove duplicates correctly"
)

def test_clean_smiles(self):
input_smiles = ["CC>>CC", "CC>>CC", "CC>>CCC"]
expected_output = ["CC>>CC"] # Assuming 'CC>>CCC' is not balanced
result = self.cleaner.clean_smiles(input_smiles)
self.assertEqual(result, expected_output, "Failed to clean SMILES correctly")


if __name__ == "__main__":
unittest.main()
Empty file.
195 changes: 195 additions & 0 deletions Test/SynGraph/Descriptor/test_graph_descriptors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import unittest
import networkx as nx
from synutility.SynIO.data_type import load_from_pickle
from synutility.SynGraph.Descriptor.graph_descriptors import GraphDescriptor


class TestGraphDescriptor(unittest.TestCase):

def setUp(self):
# Creating different types of graphs
self.acyclic_graph = nx.balanced_tree(
r=2, h=3
) # Creates a balanced binary tree, which is acyclic
self.single_cyclic_graph = nx.cycle_graph(5) # Creates a cycle with 5 nodes
self.complex_cyclic_graph = (
nx.house_x_graph()
) # Known small graph with multiple cycles
self.empty_graph = nx.Graph() # Empty graph for testing

# Set up the graph
self.graph = nx.Graph()
self.graph.add_node(
11,
element="N",
charge=0,
hcount=1,
aromatic=False,
atom_map=11,
isomer="N",
partial_charge=-0.313,
hybridization="SP3",
in_ring=True,
explicit_valence=3,
implicit_hcount=0,
neighbors=["C", "C"],
)
self.graph.add_node(
35,
element="H",
charge=0,
hcount=0,
aromatic=False,
atom_map=11,
isomer="N",
partial_charge=0,
hybridization="0",
in_ring=False,
explicit_valence=0,
implicit_hcount=0,
)
self.graph.add_node(
28,
element="C",
charge=0,
hcount=0,
aromatic=True,
atom_map=28,
isomer="N",
partial_charge=0.063,
hybridization="SP2",
in_ring=True,
explicit_valence=4,
implicit_hcount=0,
neighbors=["Br", "C", "C"],
)
self.graph.add_node(
29,
element="Br",
charge=0,
hcount=0,
aromatic=False,
atom_map=29,
isomer="N",
partial_charge=-0.047,
hybridization="SP3",
in_ring=False,
explicit_valence=1,
implicit_hcount=0,
neighbors=["C"],
)

# Adding edges with their attributes
self.graph.add_edge(11, 35, order=(1.0, 0), standard_order=1.0)
self.graph.add_edge(11, 28, order=(0, 1.0), standard_order=-1.0)
self.graph.add_edge(35, 29, order=(0, 1.0), standard_order=-1.0)
self.graph.add_edge(28, 29, order=(1.0, 0), standard_order=1.0)
# Prepare the data dictionary
self.data = {"RC": self.graph, "ITS": self.graph}

self.data_parallel = load_from_pickle("Data/test.pkl.gz")

def test_is_acyclic_graph(self):
self.assertTrue(GraphDescriptor.is_acyclic_graph(self.acyclic_graph))
self.assertFalse(GraphDescriptor.is_acyclic_graph(self.single_cyclic_graph))
self.assertFalse(GraphDescriptor.is_acyclic_graph(self.complex_cyclic_graph))
self.assertFalse(GraphDescriptor.is_acyclic_graph(self.empty_graph))

def test_is_single_cyclic_graph(self):
self.assertFalse(GraphDescriptor.is_single_cyclic_graph(self.acyclic_graph))
self.assertTrue(
GraphDescriptor.is_single_cyclic_graph(self.single_cyclic_graph)
)
self.assertFalse(
GraphDescriptor.is_single_cyclic_graph(self.complex_cyclic_graph)
)
self.assertFalse(GraphDescriptor.is_single_cyclic_graph(self.empty_graph))

def test_is_complex_cyclic_graph(self):
self.assertFalse(GraphDescriptor.is_complex_cyclic_graph(self.acyclic_graph))
self.assertFalse(
GraphDescriptor.is_complex_cyclic_graph(self.single_cyclic_graph)
)
self.assertTrue(
GraphDescriptor.is_complex_cyclic_graph(self.complex_cyclic_graph)
)
self.assertFalse(GraphDescriptor.is_complex_cyclic_graph(self.empty_graph))

def test_check_graph_type(self):
self.assertEqual(
GraphDescriptor.check_graph_type(self.acyclic_graph), "Acyclic"
)
self.assertEqual(
GraphDescriptor.check_graph_type(self.single_cyclic_graph), "Single Cyclic"
)
self.assertEqual(
GraphDescriptor.check_graph_type(self.complex_cyclic_graph),
"Combinatorial Cyclic",
)
self.assertEqual(
GraphDescriptor.check_graph_type(self.empty_graph), "Empty Graph"
)

def test_get_cycle_member_rings(self):
self.assertEqual(GraphDescriptor.get_cycle_member_rings(self.acyclic_graph), [])
self.assertEqual(
GraphDescriptor.get_cycle_member_rings(self.single_cyclic_graph), [5]
)
self.assertEqual(
GraphDescriptor.get_cycle_member_rings(self.complex_cyclic_graph),
[3, 3, 3, 3],
)
self.assertEqual(GraphDescriptor.get_cycle_member_rings(self.empty_graph), [])

def test_get_element_count(self):
# Expected results
expected_element_count = {"N": 1, "H": 1, "C": 1, "Br": 1}

# Test get_element_count
self.assertEqual(
GraphDescriptor.get_element_count(self.graph), expected_element_count
)

def test_get_descriptors(self):
# Expected output after processing
expected_output = {
"RC": self.graph,
"topo": "Single Cyclic", # Adjust based on expected graph type analysis
"cycle": [
4
], # Expected cycle results, to be filled after actual function implementation
"atom_count": {"N": 1, "H": 1, "C": 1, "Br": 1},
"rtype": "Elementary", # Expected reaction type
"rstep": 1, # This should be based on the actual cycles count
}

# Run the descriptor function
results = GraphDescriptor.get_descriptors(self.data, "RC")
self.assertEqual(results["topo"], expected_output["topo"])
self.assertEqual(results["cycle"], expected_output["cycle"])
self.assertEqual(results["rstep"], expected_output["rstep"])
self.assertEqual(results["atom_count"], expected_output["atom_count"])

def test_get_descriptors_parallel(self):
# Expected output after processing
expected_output = {
"RC": self.graph,
"topo": "Single Cyclic",
"cycle": [4],
"atom_count": {"N": 1, "H": 1, "C": 1, "Br": 1},
"rtype": "Elementary",
"rstep": 1,
}

# Run the descriptor function
results = GraphDescriptor.process_entries_in_parallel(
self.data_parallel, "GraphRules", "ITSGraph", n_jobs=4
)
self.assertEqual(results[0]["topo"], expected_output["topo"])
self.assertEqual(results[0]["cycle"], expected_output["cycle"])
self.assertEqual(results[0]["rstep"], expected_output["rstep"])
self.assertEqual(results[0]["atom_count"], expected_output["atom_count"])


if __name__ == "__main__":
unittest.main()
52 changes: 52 additions & 0 deletions Test/SynGraph/Descriptor/test_graph_signature.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import unittest
from synutility.SynIO.data_type import load_from_pickle
from synutility.SynGraph.Descriptor.graph_signature import GraphSignature


class TestGraphSignature(unittest.TestCase):

def setUp(self):
# Create a sample graph for testing
data = load_from_pickle("Data/test.pkl.gz")
self.rc = data[0]["GraphRules"][2]
self.its = data[0]["ITSGraph"][2]

def test_create_topology_signature(self):
signature = GraphSignature(self.rc)
self.assertEqual(
signature.create_topology_signature(
topo="Single Cyclic", cycle=[4], rstep=1
),
"114",
)

def test_create_node_signature(self):
signature = GraphSignature(self.rc)
self.assertEqual(signature.create_node_signature(), "BrCHN")

def test_create_node_signature_condensed(self):
signature = GraphSignature(self.its)
self.assertEqual(signature.create_node_signature(), "BrC{23}ClHN{3}O{5}S")

def test_create_edge_signature(self):
signature = GraphSignature(self.rc)
self.assertEqual(
signature.create_edge_signature(), "Br[-1]H/Br[1]C/C[-1]N/H[1]N"
)

def test_create_graph_signature(self):
# Ensure the graph signature combines the results correctly
signature = GraphSignature(self.rc)
node_signature = "BrCHN"
edge_signature = "Br[-1]H/Br[1]C/C[-1]N/H[1]N"
topo_signature = "114"
expected = f"{topo_signature}.{node_signature}.{edge_signature}"
self.assertEqual(
signature.create_graph_signature(topo="Single Cyclic", cycle=[4], rstep=1),
expected,
)


# Running the tests
if __name__ == "__main__":
unittest.main()
Empty file.
Loading

0 comments on commit bb742fa

Please sign in to comment.