From 4d7542afdf04a8d2eb06c38182bd3dee7b71906c Mon Sep 17 00:00:00 2001 From: muddymudskipper Date: Fri, 21 Jun 2024 09:19:19 +0100 Subject: [PATCH] rename main plugin py file, add icon --- README.md | 2 +- cmem_plugin_robotreason/__init__.py | 216 +---------------- cmem_plugin_robotreason/obofoundry.png | Bin 0 -> 2319 bytes cmem_plugin_robotreason/plugin_robotreason.py | 218 ++++++++++++++++++ tests/test_robotreason.py | 2 +- 5 files changed, 221 insertions(+), 217 deletions(-) create mode 100644 cmem_plugin_robotreason/obofoundry.png create mode 100644 cmem_plugin_robotreason/plugin_robotreason.py diff --git a/README.md b/README.md index 09fb658..e99f99b 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ include = ["cmem_plugin_robotreason/bin/*"] ``` -:bulb: Prior to the build process, the Java library _robot.jar_ (v1.9.4) and the _robot.sh_ script are automatically +:bulb: Prior to the build process, the Java library _robot.jar_ (v1.9.6) and the _robot.sh_ script are automatically downloaded from the [ROBOT GitHub repository](https://github.com/ontodev/robot). The _jar_ file is not downloaded if it already exists in the same version. The ROBOT files are downloaded to the directory _cmem_plugin_robotreason/workflow/bin_ and are not removed automatically when running `task clean`. The files can be diff --git a/cmem_plugin_robotreason/__init__.py b/cmem_plugin_robotreason/__init__.py index 2425f4c..41d8786 100644 --- a/cmem_plugin_robotreason/__init__.py +++ b/cmem_plugin_robotreason/__init__.py @@ -1,215 +1 @@ -"""Reasoning with robot plugin module""" - -import re -import shlex -import unicodedata -from collections import OrderedDict -from collections.abc import Sequence -from datetime import UTC, datetime -from pathlib import Path -from subprocess import run # nosec -from time import time -from uuid import uuid4 -from xml.etree.ElementTree import ( - Element, - SubElement, - tostring, -) - -from cmem.cmempy.dp.proxy.graph import get, get_graph_import_tree, post_streamed -from cmem_plugin_base.dataintegration.context import ExecutionContext -from cmem_plugin_base.dataintegration.description import Plugin, PluginParameter -from cmem_plugin_base.dataintegration.entity import Entities -from cmem_plugin_base.dataintegration.parameter.choice import ChoiceParameterType -from cmem_plugin_base.dataintegration.parameter.graph import GraphParameterType -from cmem_plugin_base.dataintegration.plugins import WorkflowPlugin -from cmem_plugin_base.dataintegration.types import StringParameterType -from cmem_plugin_base.dataintegration.utils import setup_cmempy_user_access -from defusedxml import minidom - -ROBOT = Path(__path__[0]) / "bin" / "robot" - - -def convert_iri_to_filename(value: str) -> str: - """Convert IRI to filename""" - value = unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii") - value = re.sub(r"\.", "_", value.lower()) - value = re.sub(r"/", "_", value.lower()) - value = re.sub(r"[^\w\s-]", "", value.lower()) - value = re.sub(r"[-\s]+", "-", value).strip("-_") - return value + ".nt" - - -@Plugin( - label="Reasoning with ROBOT", - description="Given a data and an ontology grpah, this task performs reasoning " "using ROBOT.", - documentation="""A task performing reasoning using ROBOT (ROBOT is an OBO Tool). - It takes an OWL ontology and a data graph as inputs and writes the reasoning result - to a specified graph. The following reasoner options are supported: ELK, Expression - Materializing Reasoner, HermiT, JFact, Structural Reasoner and Whelk.""", - parameters=[ - PluginParameter( - param_type=GraphParameterType( - classes=[ - "http://www.w3.org/2002/07/owl#Ontology", - "https://vocab.eccenca.com/di/Dataset", - "http://rdfs.org/ns/void#Dataset", - ] - ), - name="data_graph_iri", - label="Data graph IRI", - description="The IRI of the input data graph.", - ), - PluginParameter( - param_type=GraphParameterType(classes=["http://www.w3.org/2002/07/owl#Ontology"]), - name="ontology_graph_iri", - label="Ontology_graph_IRI", - description="The IRI of the input ontology graph.", - ), - PluginParameter( - param_type=StringParameterType(), - name="result_iri", - label="Result graph IRI", - description="The IRI of the output graph for the reasoning result. " - "WARNING: existing graph will be overwritten!", - ), - PluginParameter( - param_type=ChoiceParameterType( - OrderedDict( - { - "elk": "ELK", - "emr": "Expression Materializing Reasoner", - "hermit": "HermiT", - "jfact": "JFact", - "structural": "Structural Reasoner", - "whelk": "Whelk", - } - ) - ), - name="reasoner", - label="Reasoner", - description="Reasoner option.", - default_value="elk", - ), - ], -) -class RobotReasonPlugin(WorkflowPlugin): - """Robot reasoning plugin""" - - def __init__( - self, data_graph_iri: str, ontology_graph_iri: str, result_iri: str, reasoner: str - ) -> None: - """Init""" - self.data_graph_iri = data_graph_iri - self.ontology_graph_iri = ontology_graph_iri - self.result_iri = result_iri - self.reasoner = reasoner - self.temp = f"robot_{uuid4().hex}" - if not Path(self.temp).exists(): - Path(self.temp).mkdir(parents=True) - - def create_xml_catalog_file(self, graphs: dict) -> None: - """Create XML catalog file""" - file_name = Path(self.temp) / "catalog-v001.xml" - catalog = Element("catalog") - catalog.set("prefer", "public") - catalog.set("xmlns", "urn:oasis:names:tc:entity:xmlns:xml:catalog") - for graph in graphs: - uri = SubElement(catalog, "uri") - uri.set("id", "Auto-generated import resolution by cmem-plugin-robotreason") - uri.set("name", graph) - uri.set("uri", graphs[graph]) - reparsed = minidom.parseString(tostring(catalog, "utf-8")).toxml() # nosec - with Path(file_name).open("w", encoding="utf-8") as file: - file.truncate(0) - file.write(reparsed) - - def get_graphs(self, graphs: dict) -> None: - """Get graphs from CMEM""" - for graph in graphs: - with (Path(self.temp) / graphs[graph]).open("w", encoding="utf-8") as file: - file.write(get(graph).text) - file.write( - f"<{graph}> " - f" " - f" .\n" - ) - - def get_graphs_tree(self) -> dict: - """Get graph import tree""" - graphs = {} - for graph_iri in (self.data_graph_iri, self.ontology_graph_iri): - if graph_iri not in graphs: - graphs[graph_iri] = convert_iri_to_filename(graph_iri) - tree = get_graph_import_tree(graph_iri) - for value in tree["tree"].values(): - for iri in value: - if iri not in graphs: - graphs[iri] = convert_iri_to_filename(iri) - return graphs - - def reason(self, graphs: dict) -> None: - """Reason""" - inputs = "" - for value in graphs.values(): - inputs += f' --input "{self.temp}/{value}"' - utctime = str(datetime.fromtimestamp(int(time()), tz=UTC))[:-6].replace(" ", "T") + "Z" - cmd = ( - f"{ROBOT} merge{inputs} --collapse-import-closure false " - f"reason --reasoner {self.reasoner} " - f'--axiom-generators "ClassAssertion PropertyAssertion" ' - f"--include-indirect true " - f"--exclude-duplicate-axioms true " - f"--exclude-owl-thing true " - f"--exclude-tautologies all " - f"--exclude-external-entities " - f"reduce --reasoner {self.reasoner} " - f"unmerge{inputs} " - f'annotate --ontology-iri "{self.result_iri}" ' - f"--remove-annotations " - f'--language-annotation rdfs:label "Eccenca Reasoning Result {utctime}" en ' - f"--language-annotation rdfs:comment " - f'"Reasoning result set of <{self.data_graph_iri}> and ' - f'<{self.ontology_graph_iri}>" en ' - f"--language-annotation prov:wasGeneratedBy " - f'"cmem-plugin-robotreason ({self.reasoner})" en ' - f'--link-annotation prov:wasDerivedFrom "{self.data_graph_iri}" ' - f"--link-annotation prov:wasDerivedFrom " - f'"{self.ontology_graph_iri}" ' - f'--typed-annotation dc:created "{utctime}" xsd:dateTime ' - f'--output "{self.temp}/result.ttl"' - ) - run(shlex.split(cmd), check=False) # noqa: S603 - - def send_result(self) -> None: - """Send result""" - post_streamed( - self.result_iri, - str(Path(self.temp) / "result.ttl"), - replace=True, - content_type="text/turtle", - ) - - def clean_up(self, graphs: dict) -> None: - """Remove temporary files""" - files = ["catalog-v001.xml", "result.ttl"] - files += list(graphs.values()) - for file in files: - try: - (Path(self.temp) / file).unlink() - except (OSError, FileNotFoundError) as err: - self.log.warning(f"Cannot remove file {file} ({err})") - try: - Path(self.temp).rmdir() - except (OSError, FileNotFoundError) as err: - self.log.warning(f"Cannot remove directory {self.temp} ({err})") - - def execute(self, inputs: Sequence[Entities], context: ExecutionContext) -> None: # noqa: ARG002 - """Execute plugin""" - setup_cmempy_user_access(context.user) - graphs = self.get_graphs_tree() - self.get_graphs(graphs) - self.create_xml_catalog_file(graphs) - self.reason(graphs) - self.send_result() - self.clean_up(graphs) +"""robotreason - main package""" diff --git a/cmem_plugin_robotreason/obofoundry.png b/cmem_plugin_robotreason/obofoundry.png new file mode 100644 index 0000000000000000000000000000000000000000..94ba09733ff817418c2ccdf3b2a69b628c206961 GIT binary patch literal 2319 zcmV+q3GnubP)C00093P)t-s00001 z0RaL60s{jB1Ox;H1qB8M1_uWR2nYxX2?+`c3JVJh3=9kn4Gj(s4i66x5D*X%5fKs+ z5)%^>6ciK{6%`g178e&67#J8C85tTH8XFrM92^`S9UUGX9v>ecARr(iAt53nA|oRs zBqSsyB_$>%CMPE+C@3f?DJd!{Dl021EG#T7EiEoCE-x=HFfcGNF)=bSGBYzXG&D3d zH8nOiHa9mnI5;>tIXOByIy*Z%JUl!-Jv}}?K0iM{KtMo2K|w-7LPJACL_|bIMMXwN zMn^|SNJvOYNl8jdN=r*iOiWBoO-)WtPESuyP*6}&QBhJ-Qd3h?R8&+|RaI72R##V7 zSXfwDSy@_IT3cINTwGjTU0q&YUSD5dU|?WjVPRroVq;@tWMpJzWo2e&W@l$-XlQ6@ zX=!R|YHMq2Y;0_8ZEbFDZf|dIaBy&OadC2Ta&vQYbaZreb#-=jc6WDoczAeud3kzz zdV70&e0+R;eSLm@et&;|fPjF3fq{a8f`fyDgoK2Jg@uNOhKGlTh=_=ZiHVAeii?Yj zjEszpjg5|uj*pLzkdTm(k&%*;l9Q8@l$4Z}m6ev3mY0{8n3$NEnVFiJnwy)OoSdAU zot>VZo}ZteprD|kp`oIpqNAguq@<*!rKP5(rl+T;sHmu^si~@}s;jH3tgNi9t*x%E zuCK4Ju&}VPv9YqUva_?Zw6wIfwY9dkwzs#pxVX5vxw*Q!y1To(yu7@dCU$jHda$;ryf%FD~k%*@Qq&CSlv&d<-!(9qD) z(b3Y<($mw^)YR0~)z#M4*4Nk9*x1lt)=I7_<=;-L_>FMg~>g((4?Ck9A?d|UF?(gsK@bK{Q@$vHV^7Hfa z^z`)g_4W4l_V@Sq`1ttw`T6?#`uqF){QUg={r&#_{{R2~;;>zP000HbNklN&RlfVnYyfTv-E-gxw*VvI zzo%&ZQxoZzxB(bW9NtLjHxlV7X93hDSPr*Q`g@7CbBX|v9Dh(C8gGZ$t^sal0Eosv z0bs}}K<83MKTl>Sfb0S^UCD?8^bI8ST>*T;QBSB<1g>5u09*O`WcZVz>$npDuKJEL zBI0~rr&t51SDk(j|EgF6;OZ|58*|TzMujzi7QqOB;oeo$@BT?{049q7DBGWI{L26^ z^4D2VPXn;fj=nK~EqzmfcWmh!0_3de3jh|%_3g>)O++sAMF7jQ29S1vjrm6ja3bp- zkafZSq5#N7L7^A`_elU26azpu3a`l;0IISEfC2Xa?@0l^a1S8wg2t=?>`g|(N}J^X z!_EO_$^qa(*8nRN08r`}!23o;U}qf2swfFs-2%*40szdfD}_580W=`=LmPDf(02u1 zAJU8mQRaYHYxiP~^ZI9YouMo2!rd=KtK#*Dll|+Z&@+B#7;P=1?hvr@&NKk+m9tR2 zT^l(UA3i%aHhkIS6~Xt5jj(CFc$qPP_bc*pAM>U?4JSPB_f;1FOnozmC~3K;$uIu3 zIlyWp0ByOse*2yvsBmkyIe=K7&iDo3!#>J$bW-6+@aS+M=5Ip~(}4jP`|8}-xnOhu zKau~HAZ7zhH3yL0isGOd$(AY0I>V|P#>eO5o#p`dN&rUUT)vex4+aPA2z?Pixdfmk z3i`h!$hQV_fF+W$@fU(=DgewBEr_VV75zbWseCK{!w8^9OHi((8@Bk`( zUJ@#(r~t^m>A@HEFM6o0RS6=S`YC77HI?ghNHv+DD@L(H;V2j9DrIsaaR$? zb(jFCM*zUvNPyQ}^#C=;9TUePwmF#ryi3M!6URMb0-#t6_(pDF`&bf(*k{E8AoPg= zu6m!jT^rzGD!-clka#~E!mzb_BrZIso+7%I}Dl17ySH zkyp6ehOoUl0CXzZ(#36g9|6#22O!67`P4GN5h|0qmF2kriVsV^DQ?LWz-(a$BswQ@ zHX{HOyA41nG6w-5d8I8l4-J4`1OR{2oh5)|Ag`^?F2F%D?}bF*bh0seSE>BArw6cB zmiGbK7)?OCIGRxDCdT5A*yZ-T)j2{h_~LSxuGLu*+@!07o$ZJ@$JbT4GlVV=I`lRso{_PjONhTSL}8=rL=!lmOA* zt<}OW=iJ!62^5Gf}5XaHh_2_GjPa0oDZH6mxDLjd}*6!M<8t+uAi ztG^WBqMDtx-wqVu=kOB9cN&@kY%w{x^yUNj_m%c+0RHx(752Vt0NUeAa3~vqrWioB pm;ApBuqg)U&jz5?`!@gp|Nkw85Zft&;5YyP002ovPDHLkV1fu}Irjhn literal 0 HcmV?d00001 diff --git a/cmem_plugin_robotreason/plugin_robotreason.py b/cmem_plugin_robotreason/plugin_robotreason.py new file mode 100644 index 0000000..6b221a4 --- /dev/null +++ b/cmem_plugin_robotreason/plugin_robotreason.py @@ -0,0 +1,218 @@ +"""Reasoning with robot plugin module""" + +import re +import shlex +import unicodedata +from collections import OrderedDict +from collections.abc import Sequence +from datetime import UTC, datetime +from pathlib import Path +from subprocess import run # nosec +from time import time +from uuid import uuid4 +from xml.etree.ElementTree import ( + Element, + SubElement, + tostring, +) + +from cmem.cmempy.dp.proxy.graph import get, get_graph_import_tree, post_streamed +from cmem_plugin_base.dataintegration.context import ExecutionContext +from cmem_plugin_base.dataintegration.description import Icon, Plugin, PluginParameter +from cmem_plugin_base.dataintegration.entity import Entities +from cmem_plugin_base.dataintegration.parameter.choice import ChoiceParameterType +from cmem_plugin_base.dataintegration.parameter.graph import GraphParameterType +from cmem_plugin_base.dataintegration.plugins import WorkflowPlugin +from cmem_plugin_base.dataintegration.types import StringParameterType +from cmem_plugin_base.dataintegration.utils import setup_cmempy_user_access +from defusedxml import minidom + +from . import __path__ + +ROBOT = Path(__path__[0]) / "bin" / "robot" + + +def convert_iri_to_filename(value: str) -> str: + """Convert IRI to filename""" + value = unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii") + value = re.sub(r"\.", "_", value.lower()) + value = re.sub(r"/", "_", value.lower()) + value = re.sub(r"[^\w\s-]", "", value.lower()) + value = re.sub(r"[-\s]+", "-", value).strip("-_") + return value + ".nt" + + +@Plugin( + label="Reasoning with ROBOT", + icon=Icon(file_name="obofoundry.png", package=__package__), + description="Given a data and an ontology grpah, this task performs reasoning " "using ROBOT.", + documentation="""A task performing reasoning using ROBOT (ROBOT is an OBO Tool). + It takes an OWL ontology and a data graph as inputs and writes the reasoning result + to a specified graph. The following reasoner options are supported: ELK, Expression + Materializing Reasoner, HermiT, JFact, Structural Reasoner and Whelk.""", + parameters=[ + PluginParameter( + param_type=GraphParameterType( + classes=[ + "http://www.w3.org/2002/07/owl#Ontology", + "https://vocab.eccenca.com/di/Dataset", + "http://rdfs.org/ns/void#Dataset", + ] + ), + name="data_graph_iri", + label="Data graph IRI", + description="The IRI of the input data graph.", + ), + PluginParameter( + param_type=GraphParameterType(classes=["http://www.w3.org/2002/07/owl#Ontology"]), + name="ontology_graph_iri", + label="Ontology_graph_IRI", + description="The IRI of the input ontology graph.", + ), + PluginParameter( + param_type=StringParameterType(), + name="result_iri", + label="Result graph IRI", + description="The IRI of the output graph for the reasoning result. " + "WARNING: existing graph will be overwritten!", + ), + PluginParameter( + param_type=ChoiceParameterType( + OrderedDict( + { + "elk": "ELK", + "emr": "Expression Materializing Reasoner", + "hermit": "HermiT", + "jfact": "JFact", + "structural": "Structural Reasoner", + "whelk": "Whelk", + } + ) + ), + name="reasoner", + label="Reasoner", + description="Reasoner option.", + default_value="elk", + ), + ], +) +class RobotReasonPlugin(WorkflowPlugin): + """Robot reasoning plugin""" + + def __init__( + self, data_graph_iri: str, ontology_graph_iri: str, result_iri: str, reasoner: str + ) -> None: + """Init""" + self.data_graph_iri = data_graph_iri + self.ontology_graph_iri = ontology_graph_iri + self.result_iri = result_iri + self.reasoner = reasoner + self.temp = f"robot_{uuid4().hex}" + if not Path(self.temp).exists(): + Path(self.temp).mkdir(parents=True) + + def create_xml_catalog_file(self, graphs: dict) -> None: + """Create XML catalog file""" + file_name = Path(self.temp) / "catalog-v001.xml" + catalog = Element("catalog") + catalog.set("prefer", "public") + catalog.set("xmlns", "urn:oasis:names:tc:entity:xmlns:xml:catalog") + for graph in graphs: + uri = SubElement(catalog, "uri") + uri.set("id", "Auto-generated import resolution by cmem-plugin-robotreason") + uri.set("name", graph) + uri.set("uri", graphs[graph]) + reparsed = minidom.parseString(tostring(catalog, "utf-8")).toxml() # nosec + with Path(file_name).open("w", encoding="utf-8") as file: + file.truncate(0) + file.write(reparsed) + + def get_graphs(self, graphs: dict) -> None: + """Get graphs from CMEM""" + for graph in graphs: + with (Path(self.temp) / graphs[graph]).open("w", encoding="utf-8") as file: + file.write(get(graph).text) + file.write( + f"<{graph}> " + f" " + f" .\n" + ) + + def get_graphs_tree(self) -> dict: + """Get graph import tree""" + graphs = {} + for graph_iri in (self.data_graph_iri, self.ontology_graph_iri): + if graph_iri not in graphs: + graphs[graph_iri] = convert_iri_to_filename(graph_iri) + tree = get_graph_import_tree(graph_iri) + for value in tree["tree"].values(): + for iri in value: + if iri not in graphs: + graphs[iri] = convert_iri_to_filename(iri) + return graphs + + def reason(self, graphs: dict) -> None: + """Reason""" + inputs = "" + for value in graphs.values(): + inputs += f' --input "{self.temp}/{value}"' + utctime = str(datetime.fromtimestamp(int(time()), tz=UTC))[:-6].replace(" ", "T") + "Z" + cmd = ( + f"{ROBOT} merge{inputs} --collapse-import-closure false " + f"reason --reasoner {self.reasoner} " + f'--axiom-generators "ClassAssertion PropertyAssertion" ' + f"--include-indirect true " + f"--exclude-duplicate-axioms true " + f"--exclude-owl-thing true " + f"--exclude-tautologies all " + f"--exclude-external-entities " + f"reduce --reasoner {self.reasoner} " + f"unmerge{inputs} " + f'annotate --ontology-iri "{self.result_iri}" ' + f"--remove-annotations " + f'--language-annotation rdfs:label "Eccenca Reasoning Result {utctime}" en ' + f"--language-annotation rdfs:comment " + f'"Reasoning result set of <{self.data_graph_iri}> and ' + f'<{self.ontology_graph_iri}>" en ' + f"--language-annotation prov:wasGeneratedBy " + f'"cmem-plugin-robotreason ({self.reasoner})" en ' + f'--link-annotation prov:wasDerivedFrom "{self.data_graph_iri}" ' + f"--link-annotation prov:wasDerivedFrom " + f'"{self.ontology_graph_iri}" ' + f'--typed-annotation dc:created "{utctime}" xsd:dateTime ' + f'--output "{self.temp}/result.ttl"' + ) + run(shlex.split(cmd), check=False) # noqa: S603 + + def send_result(self) -> None: + """Send result""" + post_streamed( + self.result_iri, + str(Path(self.temp) / "result.ttl"), + replace=True, + content_type="text/turtle", + ) + + def clean_up(self, graphs: dict) -> None: + """Remove temporary files""" + files = ["catalog-v001.xml", "result.ttl"] + files += list(graphs.values()) + for file in files: + try: + (Path(self.temp) / file).unlink() + except (OSError, FileNotFoundError) as err: + self.log.warning(f"Cannot remove file {file} ({err})") + try: + Path(self.temp).rmdir() + except (OSError, FileNotFoundError) as err: + self.log.warning(f"Cannot remove directory {self.temp} ({err})") + + def execute(self, inputs: Sequence[Entities], context: ExecutionContext) -> None: # noqa: ARG002 + """Execute plugin""" + setup_cmempy_user_access(context.user) + graphs = self.get_graphs_tree() + self.get_graphs(graphs) + self.create_xml_catalog_file(graphs) + self.reason(graphs) + self.send_result() + self.clean_up(graphs) diff --git a/tests/test_robotreason.py b/tests/test_robotreason.py index a589739..08fdfa0 100644 --- a/tests/test_robotreason.py +++ b/tests/test_robotreason.py @@ -7,7 +7,7 @@ from rdflib import DCTERMS, OWL, RDF, RDFS, Graph, URIRef from rdflib.compare import to_isomorphic -from cmem_plugin_robotreason import RobotReasonPlugin +from cmem_plugin_robotreason.plugin_robotreason import RobotReasonPlugin from tests.utils import TestExecutionContext, needs_cmem from . import __path__