Skip to content

Commit

Permalink
Prepare release v.0.0.5 (#10)
Browse files Browse the repository at this point in the history
* prepare release

* fix conflict version and prepare release

* clean up and prepare release

* test branch before merging

* fix lint format
  • Loading branch information
TieuLongPhan authored Nov 11, 2024
1 parent 479a8a1 commit af0dc37
Show file tree
Hide file tree
Showing 21 changed files with 115 additions and 36 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-and-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name: Test & Lint

on:
push:
branches: [ "main", "dev" ]
branches: [ "main", "dev", "dev-clean"]
pull_request:
branches: [ "main" ]

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ Data/Report/*
Data/Temp/Benchmark/Complete/*
Data/Temp/Benchmark/Hier/*
Data/Temp/Benchmark/Raw/*
*.ipynb
9 changes: 6 additions & 3 deletions Docs/Analysis/_3_cgrtool_aam.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,12 @@ def parse_reactions(
except StopIteration:
break
elif input_file.endswith(".smi") or input_file.endswith(".smiles"):
with SMILESRead(
input_file, ignore=True, store_log=True, remap=False, header=True
) as ifile, open(input_file) as meta_searcher:
with (
SMILESRead(
input_file, ignore=True, store_log=True, remap=False, header=True
) as ifile,
open(input_file) as meta_searcher,
):
id_tag_position = meta_searcher.readline().strip().split().index(id_tag)
if id_tag_position is None or id_tag_position == 0:
logging.critical(f"No reaction ID tag was found in the header!")
Expand Down
2 changes: 1 addition & 1 deletion Test/SynComp/test_rule_compose.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import unittest
from syntemp.SynComp.rule_compose import RuleCompose
from pathlib import Path
from syntemp.SynUtils.graph_utils import load_gml_as_text
from synutility.SynIO.data_type import load_gml_as_text
from mod import ruleGMLString

root_dir = Path(__file__).parents[2]
Expand Down
15 changes: 15 additions & 0 deletions Test/SynITS/test_its_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,21 @@ def test_parallel_process_smiles(self):
# Inequivalent AAM
self.assertEqual(results_wrong[0]["equivariant"], 0)

def test_unsanitize_smiles(self):
test_2 = {
"R_ID": "R_1",
"Map": "[CH2:1]=[CH2:2].[H:3][H:4]>>[CH2:1]([H:3])[CH2:2]([H:4])",
}
its_true, _ = ITSExtraction.process_mapped_smiles(test_2, ["Map"])
its_false, _ = ITSExtraction.process_mapped_smiles(
test_2, ["Map"], sanitize=False
)
self.assertNotEqual(
len(its_true["ITSGraph"][2].nodes()), len(its_false["ITSGraph"][2].nodes())
)
self.assertEqual(len(its_true["ITSGraph"][2].nodes()), 2)
self.assertEqual(len(its_false["ITSGraph"][2].nodes()), 4)


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion Test/SynRule/test_hierarchical_clustering.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest
from syntemp.SynRule.hierarchical_clustering import HierarchicalClustering
from syntemp.SynUtils.utils import load_from_pickle
from synutility.SynIO.data_type import load_from_pickle


class TestRuleCluster(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion Test/SynRule/test_rule_cluster.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest
from syntemp.SynRule.rule_cluster import RuleCluster
from syntemp.SynUtils.utils import load_from_pickle
from synutility.SynIO.data_type import load_from_pickle


class TestRuleCluster(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion Test/SynRule/test_rule_writing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest
import networkx as nx
from syntemp.SynUtils.utils import load_from_pickle
from synutility.SynIO.data_type import load_from_pickle
from syntemp.SynRule.rule_writing import RuleWriting


Expand Down
5 changes: 3 additions & 2 deletions Test/test_auto_template.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import unittest
from syntemp.auto_template import AutoTemp
from syntemp.SynUtils.utils import load_database
from pathlib import Path
from syntemp.auto_template import AutoTemp
from synutility.SynIO.data_type import load_database


root_dir = Path(__file__).parents[1]

Expand Down
4 changes: 2 additions & 2 deletions Test/test_main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import unittest
import os
import shutil
import unittest
import subprocess
import tempfile
import shutil


class TestCMD(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "syntemp"
version = "0.0.4"
version = "0.0.5"
authors = [
{name="Tieu Long Phan", email="[email protected]"}
]
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ rdkit>=2024.3.3
networkx>=3.3
seaborn>=0.13.2
joblib>=1.3.2
synrbl>=0.0.25
synrbl>=0.0.25
synutility>=0.0.10
3 changes: 2 additions & 1 deletion syntemp/SynAAM/aam_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from operator import eq
from joblib import Parallel, delayed
from networkx.algorithms.isomorphism import generic_node_match, generic_edge_match
from synutility.SynIO.Format.mol_to_graph import MolToGraph
from syntemp.SynITS.its_construction import ITSConstruction

from syntemp.SynITS.its_extraction import ITSExtraction
from syntemp.SynChemistry.mol_to_graph import MolToGraph
from syntemp.SynRule.rules_extraction import RuleExtraction
from syntemp.SynUtils.chemutils import enumerate_tautomers, mapping_success_rate
from itertools import combinations
Expand Down
9 changes: 9 additions & 0 deletions syntemp/SynChemistry/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import warnings

warnings.warn(
"The 'SynChemistry' subpackage is deprecated and will be removed in future releases. "
"Please migrate to the 'synutility' package as soon as possible,"
+ " which offers enhanced functionality. "
"You can install it directly using pip: `pip install synutility`.",
FutureWarning,
)
39 changes: 31 additions & 8 deletions syntemp/SynITS/its_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
from joblib import Parallel, delayed
from operator import eq
from networkx.algorithms.isomorphism import generic_node_match, generic_edge_match
from synutility.SynIO.debug import setup_logging
from synutility.SynIO.Format.mol_to_graph import MolToGraph
from synutility.SynChem.Reaction.standardize import Standardize
from syntemp.SynITS.its_construction import ITSConstruction
from syntemp.SynChemistry.mol_to_graph import MolToGraph
from syntemp.SynRule.rules_extraction import RuleExtraction
from syntemp.SynUtils.chemutils import remove_atom_mapping
from syntemp.SynUtils.utils import setup_logging


logger = setup_logging()

Expand All @@ -19,18 +20,19 @@ def __init__(self):
pass

@staticmethod
def graph_from_smiles(smiles: str) -> nx.Graph:
def graph_from_smiles(smiles: str, sanitize: bool = True) -> nx.Graph:
"""
Constructs a graph representation from a SMILES string.
Parameters:
- smiles (str): A SMILES string representing a molecule or a set of molecules.
- sanitize (bool): Whether to sanitize the molecule(s).
Returns:
- nx.Graph: A graph representation of the molecule(s).
"""

mol = Chem.MolFromSmiles(smiles)
mol = Chem.MolFromSmiles(smiles, sanitize=sanitize)
graph = MolToGraph().mol_to_graph(mol, drop_non_aam=True)
return graph

Expand Down Expand Up @@ -75,6 +77,7 @@ def process_mapped_smiles(
ignore_aromaticity: bool = False,
confident_mapper: str = "graphormer",
symbol: str = ">>",
sanitize: bool = True,
) -> Dict[str, any]:
"""
Processes mapped SMILES strings representing chemical reactions by constructing
Expand All @@ -89,6 +92,15 @@ def process_mapped_smiles(
- mapper_names (List[str]): A list of mapper names to be processed.
- check_method (str): A method to check for isomorphism among the ITS graphs.
Either 'RC' or 'ITS'. Defaults to 'RC'.
- id_column (str): The name of the column in the dataframe that contains the
reaction ID. Defaults to 'R-id'.
- ignore_aromaticity (bool): Whether to ignore aromaticity in the reaction
graphs. Defaults to False.
- confident_mapper (str): The name of the mapper that was used to generate the
reaction graphs. Defaults to 'graphormer'.
- symbol (str): The symbol used to separate reactants and products in the
reaction SMILES string. Defaults to '>>'.
- sanitize (bool): Whether to sanitize the molecule(s).
Returns:
- Dict[str, any]: A dictionary containing graph representations for each reaction
Expand All @@ -109,10 +121,10 @@ def process_mapped_smiles(
reactants_side, products_side = mapped_smiles[mapper].split(symbol)

# Get reactants graph G
G = ITSExtraction.graph_from_smiles(reactants_side)
G = ITSExtraction.graph_from_smiles(reactants_side, sanitize)

# Get products graph H
H = ITSExtraction.graph_from_smiles(products_side)
H = ITSExtraction.graph_from_smiles(products_side, sanitize)

# Construct the ITS graph
ITS = ITSConstruction.ITSGraph(G, H, ignore_aromaticity)
Expand Down Expand Up @@ -165,7 +177,9 @@ def process_mapped_smiles(

# Check if mapper_names is not empty to avoid IndexError
if mapper_names:
if "[O]" in remove_atom_mapping(mapped_smiles[mapper_names[0]]):
if "[O]" in Standardize().remove_atom_mapping(
mapped_smiles[mapper_names[0]]
):
target_dict["ITSGraph"] = graphs_by_map.get(mapper_names[0], None)
target_dict["GraphRules"] = rules_by_map.get(mapper_names[0], None)
else:
Expand All @@ -190,6 +204,7 @@ def parallel_process_smiles(
ignore_aromaticity: bool = False,
confident_mapper: str = "graphormer",
symbol: str = ">>",
sanitize: bool = True,
) -> List[Dict[str, any],]:
"""
Processes a list of mapped SMILES strings in parallel.
Expand All @@ -203,6 +218,13 @@ def parallel_process_smiles(
- verbose (int): The verbosity level of the parallel processing.
- check_method (str): A method to check for isomorphism among the ITS graphs.
Either 'RC' or 'ITS'. Defaults to 'RC'.
- export_full (bool): Whether to export the full results. Defaults to False.
- ignore_aromaticity (bool): Whether to ignore aromaticity in the graph.
Defaults to False.
- confident_mapper (str): The mapper name to use if the check_method is 'RC'.
Defaults to 'graphormer'.
- symbol (str): The symbol to use if the check_method is 'RC'. Defaults to '>>'.
- sanitize (bool): Whether to sanitize the molecule(s). Defaults to True.
Returns:
- List[Dict[str, any]]: A list of dictionaries containing graph representations
Expand All @@ -218,6 +240,7 @@ def parallel_process_smiles(
ignore_aromaticity,
confident_mapper,
symbol,
sanitize,
)
for mapped_smiles in mapped_smiles_list
)
Expand Down
1 change: 0 additions & 1 deletion syntemp/SynITS/its_hadjuster.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def process_single_graph_data(
hydrogen counts and aromaticity considerations.
"""
graphs = deepcopy(graph_data)
logger.info(f"{graphs}")
react_graph, prod_graph, its = graphs[column]
is_empty_graph_present = any(
(not isinstance(graph, nx.Graph) or graph.number_of_nodes() == 0)
Expand Down
19 changes: 12 additions & 7 deletions syntemp/SynRule/hierarchical_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,16 @@
add_child_ids,
get_descriptors,
)
import logging

logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
from synutility.SynIO.debug import setup_logging

logger = setup_logging()

# import logging

# logging.basicConfig(
# level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
# )


class HierarchicalClustering(RuleCluster):
Expand Down Expand Up @@ -222,8 +227,8 @@ def fit(
reaction_dicts = copy.deepcopy(original_reaction_dicts)
its_graphs = [value[its_column] for value in reaction_dicts]

logging.info("Processing with templates")
logging.info("Parent level")
logger.info("Processing with templates")
logger.info("Parent level")
cluster_indices_0, templates_0 = self.process_level(
its_graphs,
0,
Expand All @@ -239,7 +244,7 @@ def fit(
parent_cluster_indices = cluster_indices_0
for k in self.radius:
if k > 0:
logging.info(f"Child level with radius {k}")
logger.info(f"Child level with radius {k}")
cluster_indices_k, templates_k = self.process_child_level(
its_graphs,
parent_cluster_indices,
Expand Down
9 changes: 9 additions & 0 deletions syntemp/SynUtils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import warnings

warnings.warn(
"The 'SynUtils' subpackage is deprecated and will be removed in future releases. "
"Please migrate to the 'synutility' package as soon as possible,"
+ " which offers enhanced functionality. "
"You can install it directly using pip: `pip install synutility`.",
FutureWarning,
)
9 changes: 9 additions & 0 deletions syntemp/SynVis/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import warnings

warnings.warn(
"The 'SynVis' subpackage is deprecated and will be removed in future releases. "
"Please migrate to the 'synutility' package as soon as possible,"
+ " which offers enhanced functionality. "
"You can install it directly using pip: `pip install synutility`.",
FutureWarning,
)
4 changes: 2 additions & 2 deletions syntemp/auto_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
from syntemp.SynUtils.utils import (
prune_branches,
reindex_data,
save_database,
setup_logging,
)
from synutility.SynIO.data_type import save_database
from synutility.SynIO.debug import setup_logging


class AutoTemp:
Expand Down
9 changes: 6 additions & 3 deletions syntemp/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@
import shutil
import pandas as pd
from typing import List, Any, Dict, Optional, Union, Tuple
from syntemp.SynChemistry.neutralize import Neutralize
from syntemp.SynChemistry.deionize import Deionize
from synutility.SynChem.Reaction.neutralize import Neutralize
from synutility.SynChem.Reaction.deionize import Deionize
from synutility.SynIO.data_type import save_to_pickle, collect_data
from synutility.SynIO.debug import setup_logging

from syntemp.SynAAM.atom_map_consensus import AAMConsensus
from syntemp.SynITS.its_extraction import ITSExtraction
from syntemp.SynITS.its_hadjuster import ITSHAdjuster
from syntemp.SynITS.its_refinement import ITSRefinement
from syntemp.SynRule.hierarchical_clustering import HierarchicalClustering
from syntemp.SynRule.rule_writing import RuleWriting
from syntemp.SynUtils.utils import save_to_pickle, collect_data, setup_logging

from synrbl import Balancer


Expand Down

0 comments on commit af0dc37

Please sign in to comment.